{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 10525, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9993383884429932, "epoch": 0.0, "learning_rate": 1.998289786223278e-05, "loss": 1.8842, "step": 10, "task_loss": 0.6696395874023438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.9561816453933716, "epoch": 0.01, "learning_rate": 1.996389548693587e-05, "loss": 1.8112, "step": 20, "task_loss": 0.6945114135742188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.705786108970642, "epoch": 0.01, "learning_rate": 1.9944893111638956e-05, "loss": 1.7851, "step": 30, "task_loss": 0.599273681640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.6281533241271973, "epoch": 0.02, "learning_rate": 1.9925890736342042e-05, "loss": 1.5178, "step": 40, "task_loss": 0.555267333984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9100077152252197, "epoch": 0.02, "learning_rate": 1.9908788598574825e-05, "loss": 1.2264, "step": 50, "task_loss": 0.27131175994873047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0724254846572876, "epoch": 0.03, "learning_rate": 1.988978622327791e-05, "loss": 0.9712, "step": 60, "task_loss": 0.4018378257751465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2103850841522217, "epoch": 0.03, "learning_rate": 1.9870783847981e-05, "loss": 0.9075, "step": 70, "task_loss": 0.5983531475067139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6644728183746338, "epoch": 0.04, "learning_rate": 1.9851781472684087e-05, "loss": 0.8009, "step": 80, "task_loss": 0.25090086460113525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.2112879753112793, "epoch": 0.04, "learning_rate": 1.9832779097387176e-05, "loss": 0.8291, "step": 90, "task_loss": 0.4889531135559082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6701904535293579, "epoch": 0.05, "learning_rate": 1.9813776722090262e-05, "loss": 0.714, "step": 100, "task_loss": 0.25854694843292236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6915435791015625, "epoch": 0.05, "learning_rate": 1.9794774346793352e-05, "loss": 0.6274, "step": 110, "task_loss": 0.3031274080276489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6346196532249451, "epoch": 0.06, "learning_rate": 1.9775771971496438e-05, "loss": 0.7066, "step": 120, "task_loss": 0.5627198815345764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8290033340454102, "epoch": 0.06, "learning_rate": 1.9756769596199528e-05, "loss": 0.5735, "step": 130, "task_loss": 0.3992866575717926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5018503665924072, "epoch": 0.07, "learning_rate": 1.9737767220902614e-05, "loss": 0.7937, "step": 140, "task_loss": 0.16568666696548462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49170026183128357, "epoch": 0.07, "learning_rate": 1.9718764845605703e-05, "loss": 0.7142, "step": 150, "task_loss": 0.3975885510444641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5039875507354736, "epoch": 0.08, "learning_rate": 1.969976247030879e-05, "loss": 0.591, "step": 160, "task_loss": 0.24026933312416077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9136797189712524, "epoch": 0.08, "learning_rate": 1.9680760095011876e-05, "loss": 0.5924, "step": 170, "task_loss": 0.4351678192615509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7978807091712952, "epoch": 0.09, "learning_rate": 1.9661757719714965e-05, "loss": 0.7811, "step": 180, "task_loss": 0.3571215867996216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0396177768707275, "epoch": 0.09, "learning_rate": 1.9642755344418055e-05, "loss": 0.7498, "step": 190, "task_loss": 0.5442102551460266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20894655585289001, "epoch": 0.1, "learning_rate": 1.962375296912114e-05, "loss": 0.5461, "step": 200, "task_loss": 0.05690506100654602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8320358991622925, "epoch": 0.1, "learning_rate": 1.960475059382423e-05, "loss": 0.6077, "step": 210, "task_loss": 0.4803912937641144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5520890951156616, "epoch": 0.1, "learning_rate": 1.958574821852732e-05, "loss": 0.5951, "step": 220, "task_loss": 0.21851623058319092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6869984865188599, "epoch": 0.11, "learning_rate": 1.9566745843230406e-05, "loss": 0.7494, "step": 230, "task_loss": 0.30402871966362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0405832529067993, "epoch": 0.11, "learning_rate": 1.9547743467933492e-05, "loss": 0.6492, "step": 240, "task_loss": 0.5161008834838867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44516924023628235, "epoch": 0.12, "learning_rate": 1.9528741092636582e-05, "loss": 0.392, "step": 250, "task_loss": 0.1959661990404129 }, { "epoch": 0.12, "eval_accuracy": 0.8887614678899083, "eval_loss": 0.45345592498779297, "eval_runtime": 28.6659, "eval_samples_per_second": 30.419, "eval_steps_per_second": 3.802, "step": 250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5078727006912231, "epoch": 0.12, "learning_rate": 1.9509738717339668e-05, "loss": 0.6549, "step": 260, "task_loss": 0.22611352801322937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.772598147392273, "epoch": 0.13, "learning_rate": 1.9490736342042758e-05, "loss": 0.646, "step": 270, "task_loss": 0.345528244972229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5505326986312866, "epoch": 0.13, "learning_rate": 1.9471733966745844e-05, "loss": 0.5464, "step": 280, "task_loss": 0.45021820068359375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6786664128303528, "epoch": 0.14, "learning_rate": 1.9452731591448933e-05, "loss": 0.4428, "step": 290, "task_loss": 0.3662004768848419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5807572603225708, "epoch": 0.14, "learning_rate": 1.943372921615202e-05, "loss": 0.5107, "step": 300, "task_loss": 0.26058703660964966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3432176411151886, "epoch": 0.15, "learning_rate": 1.941472684085511e-05, "loss": 0.5331, "step": 310, "task_loss": 0.10017021000385284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4090806841850281, "epoch": 0.15, "learning_rate": 1.9395724465558195e-05, "loss": 0.5085, "step": 320, "task_loss": 0.1661628633737564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6101839542388916, "epoch": 0.16, "learning_rate": 1.9376722090261285e-05, "loss": 0.5719, "step": 330, "task_loss": 0.2671370506286621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6847037672996521, "epoch": 0.16, "learning_rate": 1.935771971496437e-05, "loss": 0.7526, "step": 340, "task_loss": 0.5243218541145325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 1.0114264488220215, "epoch": 0.17, "learning_rate": 1.933871733966746e-05, "loss": 0.4603, "step": 350, "task_loss": 0.5498969554901123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6255608797073364, "epoch": 0.17, "learning_rate": 1.9319714964370547e-05, "loss": 0.5996, "step": 360, "task_loss": 0.2702621519565582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.266034334897995, "epoch": 0.18, "learning_rate": 1.9300712589073636e-05, "loss": 0.4917, "step": 370, "task_loss": 0.13177835941314697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7009546756744385, "epoch": 0.18, "learning_rate": 1.9281710213776723e-05, "loss": 0.5101, "step": 380, "task_loss": 0.28630542755126953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9093587398529053, "epoch": 0.19, "learning_rate": 1.9262707838479812e-05, "loss": 0.5288, "step": 390, "task_loss": 0.5337154865264893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2342422604560852, "epoch": 0.19, "learning_rate": 1.92437054631829e-05, "loss": 0.5478, "step": 400, "task_loss": 0.10641683638095856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4991934597492218, "epoch": 0.19, "learning_rate": 1.9224703087885988e-05, "loss": 0.3909, "step": 410, "task_loss": 0.23908966779708862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4699850380420685, "epoch": 0.2, "learning_rate": 1.9205700712589074e-05, "loss": 0.4033, "step": 420, "task_loss": 0.28487616777420044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9620916247367859, "epoch": 0.2, "learning_rate": 1.9188598574821856e-05, "loss": 0.4304, "step": 430, "task_loss": 0.6841728687286377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7321962714195251, "epoch": 0.21, "learning_rate": 1.9169596199524942e-05, "loss": 0.6077, "step": 440, "task_loss": 0.4345829486846924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46323296427726746, "epoch": 0.21, "learning_rate": 1.915059382422803e-05, "loss": 0.4439, "step": 450, "task_loss": 0.17068199813365936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9150896072387695, "epoch": 0.22, "learning_rate": 1.9131591448931118e-05, "loss": 0.5283, "step": 460, "task_loss": 0.42760002613067627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3148682713508606, "epoch": 0.22, "learning_rate": 1.9112589073634208e-05, "loss": 0.459, "step": 470, "task_loss": 0.3109205663204193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.455905556678772, "epoch": 0.23, "learning_rate": 1.9093586698337294e-05, "loss": 0.4673, "step": 480, "task_loss": 0.13838760554790497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25112056732177734, "epoch": 0.23, "learning_rate": 1.907458432304038e-05, "loss": 0.5656, "step": 490, "task_loss": 0.1593867689371109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8803852796554565, "epoch": 0.24, "learning_rate": 1.905558194774347e-05, "loss": 0.4413, "step": 500, "task_loss": 0.46672773361206055 }, { "epoch": 0.24, "eval_accuracy": 0.8899082568807339, "eval_loss": 0.4671143591403961, "eval_runtime": 23.1207, "eval_samples_per_second": 37.715, "eval_steps_per_second": 4.714, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4302404522895813, "epoch": 0.24, "learning_rate": 1.9036579572446556e-05, "loss": 0.3487, "step": 510, "task_loss": 0.34454867243766785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.339828759431839, "epoch": 0.25, "learning_rate": 1.9017577197149645e-05, "loss": 0.417, "step": 520, "task_loss": 0.14348742365837097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4093138575553894, "epoch": 0.25, "learning_rate": 1.8998574821852735e-05, "loss": 0.6335, "step": 530, "task_loss": 0.2882145345211029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2194601595401764, "epoch": 0.26, "learning_rate": 1.897957244655582e-05, "loss": 0.4188, "step": 540, "task_loss": 0.058180660009384155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5428069829940796, "epoch": 0.26, "learning_rate": 1.8960570071258907e-05, "loss": 0.4371, "step": 550, "task_loss": 0.259736031293869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8619398474693298, "epoch": 0.27, "learning_rate": 1.8941567695961997e-05, "loss": 0.5938, "step": 560, "task_loss": 0.5211101174354553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1949978768825531, "epoch": 0.27, "learning_rate": 1.8922565320665086e-05, "loss": 0.3784, "step": 570, "task_loss": 0.20978039503097534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4044320583343506, "epoch": 0.28, "learning_rate": 1.8903562945368172e-05, "loss": 0.3817, "step": 580, "task_loss": 0.22608956694602966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7097648978233337, "epoch": 0.28, "learning_rate": 1.888456057007126e-05, "loss": 0.413, "step": 590, "task_loss": 0.5404372811317444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6512647867202759, "epoch": 0.29, "learning_rate": 1.8865558194774348e-05, "loss": 0.5914, "step": 600, "task_loss": 0.2793722450733185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3936828374862671, "epoch": 0.29, "learning_rate": 1.8846555819477438e-05, "loss": 0.3537, "step": 610, "task_loss": 0.1763158142566681 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5243096947669983, "epoch": 0.29, "learning_rate": 1.8827553444180524e-05, "loss": 0.3381, "step": 620, "task_loss": 0.24796336889266968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32880261540412903, "epoch": 0.3, "learning_rate": 1.880855106888361e-05, "loss": 0.329, "step": 630, "task_loss": 0.1616968810558319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3982900381088257, "epoch": 0.3, "learning_rate": 1.87895486935867e-05, "loss": 0.3864, "step": 640, "task_loss": 0.2720021903514862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.59184730052948, "epoch": 0.31, "learning_rate": 1.877054631828979e-05, "loss": 0.4413, "step": 650, "task_loss": 0.41478922963142395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2740156352519989, "epoch": 0.31, "learning_rate": 1.8751543942992875e-05, "loss": 0.363, "step": 660, "task_loss": 0.04690548777580261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2690792977809906, "epoch": 0.32, "learning_rate": 1.873254156769596e-05, "loss": 0.3805, "step": 670, "task_loss": 0.13159048557281494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6759920120239258, "epoch": 0.32, "learning_rate": 1.871353919239905e-05, "loss": 0.3392, "step": 680, "task_loss": 0.37233513593673706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2218218445777893, "epoch": 0.33, "learning_rate": 1.869453681710214e-05, "loss": 0.4838, "step": 690, "task_loss": 0.0667574405670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35146889090538025, "epoch": 0.33, "learning_rate": 1.8675534441805227e-05, "loss": 0.3663, "step": 700, "task_loss": 0.19517013430595398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4373435974121094, "epoch": 0.34, "learning_rate": 1.8656532066508316e-05, "loss": 0.2644, "step": 710, "task_loss": 0.3898613750934601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4680957794189453, "epoch": 0.34, "learning_rate": 1.8637529691211403e-05, "loss": 0.4633, "step": 720, "task_loss": 0.2719492018222809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9040226936340332, "epoch": 0.35, "learning_rate": 1.8618527315914492e-05, "loss": 0.4141, "step": 730, "task_loss": 0.6466548442840576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5508012771606445, "epoch": 0.35, "learning_rate": 1.8599524940617578e-05, "loss": 0.3819, "step": 740, "task_loss": 0.3917207717895508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10818565636873245, "epoch": 0.36, "learning_rate": 1.8580522565320668e-05, "loss": 0.29, "step": 750, "task_loss": 0.033290036022663116 }, { "epoch": 0.36, "eval_accuracy": 0.9128440366972477, "eval_loss": 0.32853972911834717, "eval_runtime": 31.1664, "eval_samples_per_second": 27.979, "eval_steps_per_second": 3.497, "step": 750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5789792537689209, "epoch": 0.36, "learning_rate": 1.8561520190023754e-05, "loss": 0.3252, "step": 760, "task_loss": 0.3467680811882019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1704629510641098, "epoch": 0.37, "learning_rate": 1.854251781472684e-05, "loss": 0.4369, "step": 770, "task_loss": 0.11706624180078506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4032849967479706, "epoch": 0.37, "learning_rate": 1.852351543942993e-05, "loss": 0.4449, "step": 780, "task_loss": 0.26390761137008667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3772156238555908, "epoch": 0.38, "learning_rate": 1.850451306413302e-05, "loss": 0.3257, "step": 790, "task_loss": 0.3016626834869385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39227956533432007, "epoch": 0.38, "learning_rate": 1.8485510688836105e-05, "loss": 0.412, "step": 800, "task_loss": 0.2939774990081787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3966776728630066, "epoch": 0.38, "learning_rate": 1.846650831353919e-05, "loss": 0.4206, "step": 810, "task_loss": 0.32761743664741516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14727869629859924, "epoch": 0.39, "learning_rate": 1.844750593824228e-05, "loss": 0.4085, "step": 820, "task_loss": 0.03596585988998413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5814430713653564, "epoch": 0.39, "learning_rate": 1.842850356294537e-05, "loss": 0.5506, "step": 830, "task_loss": 0.2807302474975586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3324885964393616, "epoch": 0.4, "learning_rate": 1.8409501187648457e-05, "loss": 0.2877, "step": 840, "task_loss": 0.11497768759727478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4697301387786865, "epoch": 0.4, "learning_rate": 1.8390498812351546e-05, "loss": 0.4512, "step": 850, "task_loss": 0.42074650526046753 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1672717034816742, "epoch": 0.41, "learning_rate": 1.8371496437054633e-05, "loss": 0.3373, "step": 860, "task_loss": 0.10989774763584137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.07770746946334839, "epoch": 0.41, "learning_rate": 1.8352494061757722e-05, "loss": 0.3943, "step": 870, "task_loss": 0.013416633009910583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4550301730632782, "epoch": 0.42, "learning_rate": 1.833349168646081e-05, "loss": 0.3908, "step": 880, "task_loss": 0.20334404706954956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14840258657932281, "epoch": 0.42, "learning_rate": 1.8314489311163898e-05, "loss": 0.2848, "step": 890, "task_loss": 0.0623587965965271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10735790431499481, "epoch": 0.43, "learning_rate": 1.8295486935866984e-05, "loss": 0.3985, "step": 900, "task_loss": 0.09991317242383957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.09649023413658142, "epoch": 0.43, "learning_rate": 1.8276484560570074e-05, "loss": 0.2586, "step": 910, "task_loss": 0.022964343428611755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.565139651298523, "epoch": 0.44, "learning_rate": 1.825748218527316e-05, "loss": 0.3881, "step": 920, "task_loss": 0.3771653473377228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.07271742820739746, "epoch": 0.44, "learning_rate": 1.823847980997625e-05, "loss": 0.3163, "step": 930, "task_loss": 0.1788436472415924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5031943321228027, "epoch": 0.45, "learning_rate": 1.8219477434679336e-05, "loss": 0.379, "step": 940, "task_loss": 0.3599282503128052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14190588891506195, "epoch": 0.45, "learning_rate": 1.8200475059382425e-05, "loss": 0.2631, "step": 950, "task_loss": 0.2649960517883301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8079104423522949, "epoch": 0.46, "learning_rate": 1.818147268408551e-05, "loss": 0.4097, "step": 960, "task_loss": 0.49955570697784424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32833898067474365, "epoch": 0.46, "learning_rate": 1.81624703087886e-05, "loss": 0.2893, "step": 970, "task_loss": 0.13918891549110413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8008242845535278, "epoch": 0.47, "learning_rate": 1.8143467933491687e-05, "loss": 0.3336, "step": 980, "task_loss": 0.39846181869506836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4929274320602417, "epoch": 0.47, "learning_rate": 1.8124465558194773e-05, "loss": 0.3877, "step": 990, "task_loss": 0.2788354456424713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24550215899944305, "epoch": 0.48, "learning_rate": 1.8105463182897863e-05, "loss": 0.2851, "step": 1000, "task_loss": 0.10739203542470932 }, { "epoch": 0.48, "eval_accuracy": 0.9151376146788991, "eval_loss": 0.24980628490447998, "eval_runtime": 24.2778, "eval_samples_per_second": 35.918, "eval_steps_per_second": 4.49, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32931697368621826, "epoch": 0.48, "learning_rate": 1.8086460807600952e-05, "loss": 0.2591, "step": 1010, "task_loss": 0.17109191417694092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19154950976371765, "epoch": 0.48, "learning_rate": 1.806745843230404e-05, "loss": 0.3552, "step": 1020, "task_loss": 0.13995346426963806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2809268534183502, "epoch": 0.49, "learning_rate": 1.8048456057007128e-05, "loss": 0.2895, "step": 1030, "task_loss": 0.18339580297470093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7353945970535278, "epoch": 0.49, "learning_rate": 1.8029453681710218e-05, "loss": 0.4529, "step": 1040, "task_loss": 0.516687273979187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1993633657693863, "epoch": 0.5, "learning_rate": 1.8010451306413304e-05, "loss": 0.3296, "step": 1050, "task_loss": 0.4725501835346222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17681953310966492, "epoch": 0.5, "learning_rate": 1.799144893111639e-05, "loss": 0.3377, "step": 1060, "task_loss": 0.06336037814617157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13305272161960602, "epoch": 0.51, "learning_rate": 1.797244655581948e-05, "loss": 0.2699, "step": 1070, "task_loss": 0.2189784049987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5291624665260315, "epoch": 0.51, "learning_rate": 1.795344418052257e-05, "loss": 0.2867, "step": 1080, "task_loss": 0.299777090549469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11125596612691879, "epoch": 0.52, "learning_rate": 1.7934441805225655e-05, "loss": 0.2417, "step": 1090, "task_loss": 0.0719640851020813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6146207451820374, "epoch": 0.52, "learning_rate": 1.791543942992874e-05, "loss": 0.3645, "step": 1100, "task_loss": 0.3506692051887512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43747201561927795, "epoch": 0.53, "learning_rate": 1.789643705463183e-05, "loss": 0.2669, "step": 1110, "task_loss": 0.2219913899898529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3032437562942505, "epoch": 0.53, "learning_rate": 1.7877434679334917e-05, "loss": 0.2974, "step": 1120, "task_loss": 0.12568572163581848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3314521610736847, "epoch": 0.54, "learning_rate": 1.7858432304038007e-05, "loss": 0.2802, "step": 1130, "task_loss": 0.1527169644832611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31865638494491577, "epoch": 0.54, "learning_rate": 1.7839429928741093e-05, "loss": 0.3081, "step": 1140, "task_loss": 0.22448736429214478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6496268510818481, "epoch": 0.55, "learning_rate": 1.7820427553444182e-05, "loss": 0.3204, "step": 1150, "task_loss": 0.40611302852630615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6914563179016113, "epoch": 0.55, "learning_rate": 1.780142517814727e-05, "loss": 0.2414, "step": 1160, "task_loss": 0.3544915020465851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.152155801653862, "epoch": 0.56, "learning_rate": 1.7782422802850358e-05, "loss": 0.3349, "step": 1170, "task_loss": 0.07939426600933075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48079612851142883, "epoch": 0.56, "learning_rate": 1.7763420427553448e-05, "loss": 0.3187, "step": 1180, "task_loss": 0.44884148240089417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.05774353817105293, "epoch": 0.57, "learning_rate": 1.7744418052256534e-05, "loss": 0.354, "step": 1190, "task_loss": 0.007377400994300842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11594004929065704, "epoch": 0.57, "learning_rate": 1.772541567695962e-05, "loss": 0.3231, "step": 1200, "task_loss": 0.03987376019358635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19646455347537994, "epoch": 0.57, "learning_rate": 1.770641330166271e-05, "loss": 0.354, "step": 1210, "task_loss": 0.19836881756782532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24901585280895233, "epoch": 0.58, "learning_rate": 1.76874109263658e-05, "loss": 0.3213, "step": 1220, "task_loss": 0.055774152278900146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.223684161901474, "epoch": 0.58, "learning_rate": 1.7668408551068885e-05, "loss": 0.2828, "step": 1230, "task_loss": 0.01634085178375244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2887876033782959, "epoch": 0.59, "learning_rate": 1.764940617577197e-05, "loss": 0.331, "step": 1240, "task_loss": 0.1671813428401947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4364287257194519, "epoch": 0.59, "learning_rate": 1.763040380047506e-05, "loss": 0.3717, "step": 1250, "task_loss": 0.1999722123146057 }, { "epoch": 0.59, "eval_accuracy": 0.9243119266055045, "eval_loss": 0.2037193924188614, "eval_runtime": 23.0016, "eval_samples_per_second": 37.91, "eval_steps_per_second": 4.739, "step": 1250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.017401084303855896, "epoch": 0.6, "learning_rate": 1.761140142517815e-05, "loss": 0.1969, "step": 1260, "task_loss": 0.00458671897649765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26557794213294983, "epoch": 0.6, "learning_rate": 1.7592399049881237e-05, "loss": 0.493, "step": 1270, "task_loss": 0.1353331208229065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45439109206199646, "epoch": 0.61, "learning_rate": 1.7573396674584323e-05, "loss": 0.3094, "step": 1280, "task_loss": 0.35616135597229004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3577348589897156, "epoch": 0.61, "learning_rate": 1.7554394299287412e-05, "loss": 0.3268, "step": 1290, "task_loss": 0.12562984228134155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35081303119659424, "epoch": 0.62, "learning_rate": 1.7535391923990502e-05, "loss": 0.3708, "step": 1300, "task_loss": 0.22274452447891235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2592296004295349, "epoch": 0.62, "learning_rate": 1.7516389548693588e-05, "loss": 0.4674, "step": 1310, "task_loss": 0.09062568843364716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18336069583892822, "epoch": 0.63, "learning_rate": 1.7497387173396674e-05, "loss": 0.2377, "step": 1320, "task_loss": 0.055865660309791565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3750176429748535, "epoch": 0.63, "learning_rate": 1.7478384798099764e-05, "loss": 0.2343, "step": 1330, "task_loss": 0.4618932902812958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.277113139629364, "epoch": 0.64, "learning_rate": 1.745938242280285e-05, "loss": 0.4476, "step": 1340, "task_loss": 0.11207205802202225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37496018409729004, "epoch": 0.64, "learning_rate": 1.744038004750594e-05, "loss": 0.3001, "step": 1350, "task_loss": 0.2029455304145813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.08501767367124557, "epoch": 0.65, "learning_rate": 1.742137767220903e-05, "loss": 0.2503, "step": 1360, "task_loss": 0.02285398542881012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14294752478599548, "epoch": 0.65, "learning_rate": 1.7402375296912115e-05, "loss": 0.1823, "step": 1370, "task_loss": 0.0727246105670929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2509918510913849, "epoch": 0.66, "learning_rate": 1.73833729216152e-05, "loss": 0.2555, "step": 1380, "task_loss": 0.12989100813865662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.068404421210289, "epoch": 0.66, "learning_rate": 1.736437054631829e-05, "loss": 0.2121, "step": 1390, "task_loss": 0.10038695484399796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.04248424619436264, "epoch": 0.67, "learning_rate": 1.734536817102138e-05, "loss": 0.22, "step": 1400, "task_loss": 0.08192986994981766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.0670127123594284, "epoch": 0.67, "learning_rate": 1.7326365795724467e-05, "loss": 0.2685, "step": 1410, "task_loss": 0.07222311943769455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37206175923347473, "epoch": 0.67, "learning_rate": 1.7307363420427553e-05, "loss": 0.2674, "step": 1420, "task_loss": 0.35340070724487305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2714982032775879, "epoch": 0.68, "learning_rate": 1.7288361045130643e-05, "loss": 0.3446, "step": 1430, "task_loss": 0.2567555904388428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.07237125188112259, "epoch": 0.68, "learning_rate": 1.7269358669833732e-05, "loss": 0.197, "step": 1440, "task_loss": 0.00902317464351654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10892651230096817, "epoch": 0.69, "learning_rate": 1.7250356294536818e-05, "loss": 0.2894, "step": 1450, "task_loss": 0.13919095695018768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3349069356918335, "epoch": 0.69, "learning_rate": 1.7231353919239904e-05, "loss": 0.2457, "step": 1460, "task_loss": 0.14725209772586823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3073666989803314, "epoch": 0.7, "learning_rate": 1.7212351543942994e-05, "loss": 0.2578, "step": 1470, "task_loss": 0.07342517375946045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38823768496513367, "epoch": 0.7, "learning_rate": 1.7193349168646084e-05, "loss": 0.331, "step": 1480, "task_loss": 0.1779521405696869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2724040448665619, "epoch": 0.71, "learning_rate": 1.717434679334917e-05, "loss": 0.2154, "step": 1490, "task_loss": 0.2927827835083008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8642085790634155, "epoch": 0.71, "learning_rate": 1.715534441805226e-05, "loss": 0.2467, "step": 1500, "task_loss": 0.5641911029815674 }, { "epoch": 0.71, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.28397560119628906, "eval_runtime": 22.6893, "eval_samples_per_second": 38.432, "eval_steps_per_second": 4.804, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.05709172412753105, "epoch": 0.72, "learning_rate": 1.7136342042755345e-05, "loss": 0.2719, "step": 1510, "task_loss": 0.025617174804210663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45153453946113586, "epoch": 0.72, "learning_rate": 1.7117339667458435e-05, "loss": 0.2957, "step": 1520, "task_loss": 0.2549566328525543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2257353961467743, "epoch": 0.73, "learning_rate": 1.709833729216152e-05, "loss": 0.2892, "step": 1530, "task_loss": 0.5615079998970032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16512510180473328, "epoch": 0.73, "learning_rate": 1.707933491686461e-05, "loss": 0.1975, "step": 1540, "task_loss": 0.09258662164211273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4988386929035187, "epoch": 0.74, "learning_rate": 1.7060332541567697e-05, "loss": 0.3206, "step": 1550, "task_loss": 0.6115778088569641 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18809723854064941, "epoch": 0.74, "learning_rate": 1.7041330166270783e-05, "loss": 0.1958, "step": 1560, "task_loss": 0.10413852334022522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4897454082965851, "epoch": 0.75, "learning_rate": 1.7022327790973873e-05, "loss": 0.2965, "step": 1570, "task_loss": 0.2838747799396515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4037947654724121, "epoch": 0.75, "learning_rate": 1.7003325415676962e-05, "loss": 0.2892, "step": 1580, "task_loss": 0.25087568163871765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.05394119769334793, "epoch": 0.76, "learning_rate": 1.698432304038005e-05, "loss": 0.2991, "step": 1590, "task_loss": 0.015048503875732422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30840539932250977, "epoch": 0.76, "learning_rate": 1.6965320665083134e-05, "loss": 0.2293, "step": 1600, "task_loss": 0.634148120880127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1973361074924469, "epoch": 0.76, "learning_rate": 1.6946318289786224e-05, "loss": 0.1691, "step": 1610, "task_loss": 0.09310252964496613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3464374244213104, "epoch": 0.77, "learning_rate": 1.6927315914489314e-05, "loss": 0.2329, "step": 1620, "task_loss": 0.28547829389572144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2688274383544922, "epoch": 0.77, "learning_rate": 1.69083135391924e-05, "loss": 0.2133, "step": 1630, "task_loss": 0.1428394615650177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18282316625118256, "epoch": 0.78, "learning_rate": 1.6889311163895486e-05, "loss": 0.2413, "step": 1640, "task_loss": 0.05560823902487755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2214377224445343, "epoch": 0.78, "learning_rate": 1.687030878859858e-05, "loss": 0.2201, "step": 1650, "task_loss": 0.10058430582284927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5333009362220764, "epoch": 0.79, "learning_rate": 1.6851306413301665e-05, "loss": 0.2844, "step": 1660, "task_loss": 0.47247129678726196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2876150906085968, "epoch": 0.79, "learning_rate": 1.683230403800475e-05, "loss": 0.3131, "step": 1670, "task_loss": 0.09321459382772446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22139661014080048, "epoch": 0.8, "learning_rate": 1.681330166270784e-05, "loss": 0.3365, "step": 1680, "task_loss": 0.2087613344192505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21150922775268555, "epoch": 0.8, "learning_rate": 1.6794299287410927e-05, "loss": 0.1991, "step": 1690, "task_loss": 0.1047440767288208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3274872303009033, "epoch": 0.81, "learning_rate": 1.6775296912114017e-05, "loss": 0.2242, "step": 1700, "task_loss": 0.2223893105983734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11536785960197449, "epoch": 0.81, "learning_rate": 1.6756294536817103e-05, "loss": 0.3133, "step": 1710, "task_loss": 0.07253136485815048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3560354709625244, "epoch": 0.82, "learning_rate": 1.6737292161520192e-05, "loss": 0.214, "step": 1720, "task_loss": 0.32433855533599854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25568869709968567, "epoch": 0.82, "learning_rate": 1.671828978622328e-05, "loss": 0.2308, "step": 1730, "task_loss": 0.1160731092095375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5275914669036865, "epoch": 0.83, "learning_rate": 1.6699287410926368e-05, "loss": 0.3426, "step": 1740, "task_loss": 0.3464980125427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33361896872520447, "epoch": 0.83, "learning_rate": 1.6680285035629454e-05, "loss": 0.2114, "step": 1750, "task_loss": 0.37090158462524414 }, { "epoch": 0.83, "eval_accuracy": 0.9243119266055045, "eval_loss": 0.2238595187664032, "eval_runtime": 28.7109, "eval_samples_per_second": 30.372, "eval_steps_per_second": 3.796, "step": 1750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.09009531885385513, "epoch": 0.84, "learning_rate": 1.6661282660332544e-05, "loss": 0.3014, "step": 1760, "task_loss": 0.005033731460571289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25561287999153137, "epoch": 0.84, "learning_rate": 1.664228028503563e-05, "loss": 0.2109, "step": 1770, "task_loss": 0.12138234078884125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.0638018250465393, "epoch": 0.85, "learning_rate": 1.662327790973872e-05, "loss": 0.2247, "step": 1780, "task_loss": 0.013234104961156845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2950674891471863, "epoch": 0.85, "learning_rate": 1.6604275534441806e-05, "loss": 0.2714, "step": 1790, "task_loss": 0.13912129402160645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27107563614845276, "epoch": 0.86, "learning_rate": 1.6585273159144895e-05, "loss": 0.3252, "step": 1800, "task_loss": 0.16670764982700348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.07524740695953369, "epoch": 0.86, "learning_rate": 1.656627078384798e-05, "loss": 0.2284, "step": 1810, "task_loss": 0.01689385622739792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49834099411964417, "epoch": 0.86, "learning_rate": 1.654726840855107e-05, "loss": 0.2676, "step": 1820, "task_loss": 0.21745765209197998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5565487742424011, "epoch": 0.87, "learning_rate": 1.652826603325416e-05, "loss": 0.3219, "step": 1830, "task_loss": 0.20299580693244934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1903807818889618, "epoch": 0.87, "learning_rate": 1.6509263657957247e-05, "loss": 0.2616, "step": 1840, "task_loss": 0.14985938370227814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.07635773718357086, "epoch": 0.88, "learning_rate": 1.6490261282660333e-05, "loss": 0.2512, "step": 1850, "task_loss": 0.20876792073249817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4339378774166107, "epoch": 0.88, "learning_rate": 1.6471258907363422e-05, "loss": 0.3134, "step": 1860, "task_loss": 0.23869368433952332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5155743360519409, "epoch": 0.89, "learning_rate": 1.6452256532066512e-05, "loss": 0.2005, "step": 1870, "task_loss": 0.27464839816093445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11325311660766602, "epoch": 0.89, "learning_rate": 1.6433254156769598e-05, "loss": 0.271, "step": 1880, "task_loss": 0.07444935292005539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11618862301111221, "epoch": 0.9, "learning_rate": 1.6414251781472684e-05, "loss": 0.2204, "step": 1890, "task_loss": 0.03171085566282272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.08980407565832138, "epoch": 0.9, "learning_rate": 1.6395249406175774e-05, "loss": 0.2843, "step": 1900, "task_loss": 0.09885088354349136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2316761016845703, "epoch": 0.91, "learning_rate": 1.637624703087886e-05, "loss": 0.1765, "step": 1910, "task_loss": 0.12237384915351868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.03400692343711853, "epoch": 0.91, "learning_rate": 1.635724465558195e-05, "loss": 0.2091, "step": 1920, "task_loss": 0.006525538861751556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.03192061930894852, "epoch": 0.92, "learning_rate": 1.6338242280285036e-05, "loss": 0.2077, "step": 1930, "task_loss": 0.005565345287322998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.056986477226018906, "epoch": 0.92, "learning_rate": 1.6319239904988125e-05, "loss": 0.2957, "step": 1940, "task_loss": 0.011667303740978241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.03923925757408142, "epoch": 0.93, "learning_rate": 1.630023752969121e-05, "loss": 0.2349, "step": 1950, "task_loss": 0.07591888308525085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30294090509414673, "epoch": 0.93, "learning_rate": 1.62812351543943e-05, "loss": 0.2222, "step": 1960, "task_loss": 0.21820741891860962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23649045825004578, "epoch": 0.94, "learning_rate": 1.626223277909739e-05, "loss": 0.2035, "step": 1970, "task_loss": 0.21682609617710114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39186713099479675, "epoch": 0.94, "learning_rate": 1.6243230403800477e-05, "loss": 0.2666, "step": 1980, "task_loss": 0.11818552017211914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3313908576965332, "epoch": 0.95, "learning_rate": 1.6224228028503563e-05, "loss": 0.3135, "step": 1990, "task_loss": 0.2329636514186859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.314828097820282, "epoch": 0.95, "learning_rate": 1.6205225653206652e-05, "loss": 0.1777, "step": 2000, "task_loss": 0.31898385286331177 }, { "epoch": 0.95, "eval_accuracy": 0.926605504587156, "eval_loss": 0.1968172788619995, "eval_runtime": 29.4372, "eval_samples_per_second": 29.622, "eval_steps_per_second": 3.703, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5318342447280884, "epoch": 0.95, "learning_rate": 1.6186223277909742e-05, "loss": 0.2675, "step": 2010, "task_loss": 0.5134344100952148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3911985158920288, "epoch": 0.96, "learning_rate": 1.6167220902612828e-05, "loss": 0.2213, "step": 2020, "task_loss": 0.2515479028224945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1401204764842987, "epoch": 0.96, "learning_rate": 1.6148218527315914e-05, "loss": 0.2272, "step": 2030, "task_loss": 0.09529782831668854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16212940216064453, "epoch": 0.97, "learning_rate": 1.6129216152019004e-05, "loss": 0.212, "step": 2040, "task_loss": 0.31426337361335754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21590156853199005, "epoch": 0.97, "learning_rate": 1.6110213776722093e-05, "loss": 0.1846, "step": 2050, "task_loss": 0.2860991060733795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38810980319976807, "epoch": 0.98, "learning_rate": 1.609121140142518e-05, "loss": 0.2844, "step": 2060, "task_loss": 0.39935895800590515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7248147130012512, "epoch": 0.98, "learning_rate": 1.6072209026128266e-05, "loss": 0.3505, "step": 2070, "task_loss": 0.5984583497047424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2939862608909607, "epoch": 0.99, "learning_rate": 1.6053206650831355e-05, "loss": 0.2442, "step": 2080, "task_loss": 0.15204966068267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13827058672904968, "epoch": 0.99, "learning_rate": 1.6034204275534445e-05, "loss": 0.1982, "step": 2090, "task_loss": 0.05438768118619919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18541285395622253, "epoch": 1.0, "learning_rate": 1.601520190023753e-05, "loss": 0.2719, "step": 2100, "task_loss": 0.3633626699447632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.00028449433713194084, "compression/movement_sparsity/importance_threshold": -0.0026588774864763865, "compression/movement_sparsity/linear_layer_sparsity": 0.0010491357083709726, "compression/movement_sparsity/model_sparsity": 0.0008146869027482775, "compression_loss": 0.07738782465457916, "distillation_loss": 0.10512904077768326, "epoch": 1.0, "learning_rate": 1.5996199524940617e-05, "loss": 0.1552, "step": 2110, "task_loss": 0.061604227870702744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0009910043789297608, "compression/movement_sparsity/importance_threshold": -0.002621092018561318, "compression/movement_sparsity/linear_layer_sparsity": 0.0011034303485395966, "compression/movement_sparsity/model_sparsity": 0.0008568483999520002, "compression_loss": 0.2695717215538025, "distillation_loss": 0.11719319969415665, "epoch": 1.01, "learning_rate": 1.5977197149643707e-05, "loss": 0.3396, "step": 2120, "task_loss": 0.3348212242126465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.001690789007294047, "compression/movement_sparsity/importance_threshold": -0.0025836662382352064, "compression/movement_sparsity/linear_layer_sparsity": 0.0012307216388136105, "compression/movement_sparsity/model_sparsity": 0.0009556940937862046, "compression_loss": 0.4599255919456482, "distillation_loss": 0.0449785441160202, "epoch": 1.01, "learning_rate": 1.5958194774346796e-05, "loss": 0.5465, "step": 2130, "task_loss": 0.017805740237236023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0023838803858471425, "compression/movement_sparsity/importance_threshold": -0.002546598425327852, "compression/movement_sparsity/linear_layer_sparsity": 0.0014600153380006022, "compression/movement_sparsity/model_sparsity": 0.0011337478690222034, "compression_loss": 0.6484553217887878, "distillation_loss": 0.08259381353855133, "epoch": 1.02, "learning_rate": 1.5939192399049882e-05, "loss": 0.7333, "step": 2140, "task_loss": 0.034424424171447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0030703106782113913, "compression/movement_sparsity/importance_threshold": -0.0025098868596690545, "compression/movement_sparsity/linear_layer_sparsity": 0.0017334528380006022, "compression/movement_sparsity/model_sparsity": 0.001346080695169288, "compression_loss": 0.8351697325706482, "distillation_loss": 0.07959377765655518, "epoch": 1.02, "learning_rate": 1.5920190023752972e-05, "loss": 0.9215, "step": 2150, "task_loss": 0.015001043677330017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0037501120480091413, "compression/movement_sparsity/importance_threshold": -0.0024735298210886133, "compression/movement_sparsity/linear_layer_sparsity": 0.0020618201407708522, "compression/movement_sparsity/model_sparsity": 0.0016010682422742115, "compression_loss": 1.0200759172439575, "distillation_loss": 0.38012033700942993, "epoch": 1.03, "learning_rate": 1.5901187648456058e-05, "loss": 1.1094, "step": 2160, "task_loss": 0.1993572860956192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.004423316658862747, "compression/movement_sparsity/importance_threshold": -0.0024375255894163272, "compression/movement_sparsity/linear_layer_sparsity": 0.0023032407407407407, "compression/movement_sparsity/model_sparsity": 0.0017885389377045428, "compression_loss": 1.2031831741333008, "distillation_loss": 0.09336289763450623, "epoch": 1.03, "learning_rate": 1.5882185273159144e-05, "loss": 1.319, "step": 2170, "task_loss": 0.3724411129951477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.005089956674394564, "compression/movement_sparsity/importance_threshold": -0.0024018724444819957, "compression/movement_sparsity/linear_layer_sparsity": 0.00264551104712436, "compression/movement_sparsity/model_sparsity": 0.00205432260476068, "compression_loss": 1.384499192237854, "distillation_loss": 0.1010119616985321, "epoch": 1.04, "learning_rate": 1.5863182897862234e-05, "loss": 1.4167, "step": 2180, "task_loss": 0.05338115245103836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0057500642582269225, "compression/movement_sparsity/importance_threshold": -0.002366568666115419, "compression/movement_sparsity/linear_layer_sparsity": 0.0030340894120746763, "compression/movement_sparsity/model_sparsity": 0.0023560659370011876, "compression_loss": 1.564030647277832, "distillation_loss": 0.021691124886274338, "epoch": 1.04, "learning_rate": 1.5844180522565323e-05, "loss": 1.6449, "step": 2190, "task_loss": 0.003468889743089676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.006403671573982206, "compression/movement_sparsity/importance_threshold": -0.002331612534146395, "compression/movement_sparsity/linear_layer_sparsity": 0.0036809201859379706, "compression/movement_sparsity/model_sparsity": 0.0028583503941561096, "compression_loss": 1.741767406463623, "distillation_loss": 0.07079476118087769, "epoch": 1.05, "learning_rate": 1.582517814726841e-05, "loss": 1.7437, "step": 2200, "task_loss": 0.022576410323381424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007050810785282746, "compression/movement_sparsity/importance_threshold": -0.0022970023284047234, "compression/movement_sparsity/linear_layer_sparsity": 0.00432740985395965, "compression/movement_sparsity/model_sparsity": 0.0033603699718875247, "compression_loss": 1.9177247285842896, "distillation_loss": 0.030747881159186363, "epoch": 1.05, "learning_rate": 1.5806175771971496e-05, "loss": 1.9982, "step": 2210, "task_loss": 0.005105555057525635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007691514055750898, "compression/movement_sparsity/importance_threshold": -0.002262736328720204, "compression/movement_sparsity/linear_layer_sparsity": 0.0052953741342968985, "compression/movement_sparsity/model_sparsity": 0.004112024705614379, "compression_loss": 2.0919368267059326, "distillation_loss": 0.2072967141866684, "epoch": 1.05, "learning_rate": 1.5787173396674585e-05, "loss": 2.1582, "step": 2220, "task_loss": 0.33855485916137695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.00832581354900901, "compression/movement_sparsity/importance_threshold": -0.002228812814922636, "compression/movement_sparsity/linear_layer_sparsity": 0.006365270249924722, "compression/movement_sparsity/model_sparsity": 0.004942832718103681, "compression_loss": 2.2644152641296387, "distillation_loss": 0.13281983137130737, "epoch": 1.06, "learning_rate": 1.5768171021377675e-05, "loss": 2.3957, "step": 2230, "task_loss": 0.10870643705129623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.00895374142867943, "compression/movement_sparsity/importance_threshold": -0.002195230066841819, "compression/movement_sparsity/linear_layer_sparsity": 0.007350924984944294, "compression/movement_sparsity/model_sparsity": 0.005708224646759998, "compression_loss": 2.43515682220459, "distillation_loss": 0.023621466010808945, "epoch": 1.06, "learning_rate": 1.574916864608076e-05, "loss": 2.5349, "step": 2240, "task_loss": 0.0038209035992622375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.009575329858384518, "compression/movement_sparsity/importance_threshold": -0.0021619863643075514, "compression/movement_sparsity/linear_layer_sparsity": 0.008769878236976815, "compression/movement_sparsity/model_sparsity": 0.006810086513455902, "compression_loss": 2.6041481494903564, "distillation_loss": 0.014014622196555138, "epoch": 1.07, "learning_rate": 1.5730166270783847e-05, "loss": 2.6501, "step": 2250, "task_loss": 0.002429734915494919 }, { "epoch": 1.07, "eval_accuracy": 0.9254587155963303, "eval_loss": 2.8218579292297363, "eval_runtime": 30.6417, "eval_samples_per_second": 28.458, "eval_steps_per_second": 3.557, "step": 2250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010190611001746603, "compression/movement_sparsity/importance_threshold": -0.0021290799871496345, "compression/movement_sparsity/linear_layer_sparsity": 0.010168882678410117, "compression/movement_sparsity/model_sparsity": 0.007896457500763263, "compression_loss": 2.7714102268218994, "distillation_loss": 0.1411275863647461, "epoch": 1.07, "learning_rate": 1.5711163895486937e-05, "loss": 2.8369, "step": 2260, "task_loss": 0.06266731023788452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.010799617022388065, "compression/movement_sparsity/importance_threshold": -0.0020965092151978655, "compression/movement_sparsity/linear_layer_sparsity": 0.011874576558265583, "compression/movement_sparsity/model_sparsity": 0.009220982491123023, "compression_loss": 2.936958074569702, "distillation_loss": 0.32265156507492065, "epoch": 1.08, "learning_rate": 1.5692161520190026e-05, "loss": 3.0225, "step": 2270, "task_loss": 0.4202197194099426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01140238008393124, "compression/movement_sparsity/importance_threshold": -0.0020642723282820446, "compression/movement_sparsity/linear_layer_sparsity": 0.013452696853357423, "compression/movement_sparsity/model_sparsity": 0.010446442577091193, "compression_loss": 3.100813865661621, "distillation_loss": 0.03656826913356781, "epoch": 1.08, "learning_rate": 1.5673159144893113e-05, "loss": 3.1307, "step": 2280, "task_loss": 0.16597847640514374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.011998932349998482, "compression/movement_sparsity/importance_threshold": -0.002032367606231971, "compression/movement_sparsity/linear_layer_sparsity": 0.015547780694820837, "compression/movement_sparsity/model_sparsity": 0.01207334113004394, "compression_loss": 3.262972354888916, "distillation_loss": 0.3931387662887573, "epoch": 1.09, "learning_rate": 1.5654156769596202e-05, "loss": 3.4253, "step": 2290, "task_loss": 0.39384859800338745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.012589305984212141, "compression/movement_sparsity/importance_threshold": -0.0020007933288774442, "compression/movement_sparsity/linear_layer_sparsity": 0.018422667777024993, "compression/movement_sparsity/model_sparsity": 0.01430578144645319, "compression_loss": 3.4234325885772705, "distillation_loss": 0.05559838190674782, "epoch": 1.09, "learning_rate": 1.5635154394299288e-05, "loss": 3.4753, "step": 2300, "task_loss": 0.011821478605270386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.013173533150194566, "compression/movement_sparsity/importance_threshold": -0.0019695477760482637, "compression/movement_sparsity/linear_layer_sparsity": 0.02154230418172237, "compression/movement_sparsity/model_sparsity": 0.01672827731611527, "compression_loss": 3.5822105407714844, "distillation_loss": 0.10458941757678986, "epoch": 1.1, "learning_rate": 1.5616152019002378e-05, "loss": 3.7453, "step": 2310, "task_loss": 0.33254674077033997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.013751646011568098, "compression/movement_sparsity/importance_threshold": -0.001938629227574229, "compression/movement_sparsity/linear_layer_sparsity": 0.025347445705359833, "compression/movement_sparsity/model_sparsity": 0.019683089489294023, "compression_loss": 3.739313840866089, "distillation_loss": 0.07036435604095459, "epoch": 1.1, "learning_rate": 1.5597149643705464e-05, "loss": 3.7802, "step": 2320, "task_loss": 0.042275626212358475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01432367673195511, "compression/movement_sparsity/importance_threshold": -0.0019080359632851387, "compression/movement_sparsity/linear_layer_sparsity": 0.02890405045543511, "compression/movement_sparsity/model_sparsity": 0.022444905034241694, "compression_loss": 3.894759178161621, "distillation_loss": 0.06921583414077759, "epoch": 1.11, "learning_rate": 1.5578147268408554e-05, "loss": 3.984, "step": 2330, "task_loss": 0.2081681489944458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.014889657474977936, "compression/movement_sparsity/importance_threshold": -0.0018777662630107927, "compression/movement_sparsity/linear_layer_sparsity": 0.03373462671258657, "compression/movement_sparsity/model_sparsity": 0.026195999557121603, "compression_loss": 4.048542022705078, "distillation_loss": 0.021231140941381454, "epoch": 1.11, "learning_rate": 1.555914489311164e-05, "loss": 4.1113, "step": 2340, "task_loss": 0.00398905947804451 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015449620404258941, "compression/movement_sparsity/importance_threshold": -0.0018478184065809896, "compression/movement_sparsity/linear_layer_sparsity": 0.04030603075127973, "compression/movement_sparsity/model_sparsity": 0.03129890165098249, "compression_loss": 4.2006354331970215, "distillation_loss": 0.024290261790156364, "epoch": 1.12, "learning_rate": 1.554014251781473e-05, "loss": 4.25, "step": 2350, "task_loss": 0.005856834352016449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.016003597683420464, "compression/movement_sparsity/importance_threshold": -0.0018181906738255296, "compression/movement_sparsity/linear_layer_sparsity": 0.04683566696778079, "compression/movement_sparsity/model_sparsity": 0.036369369716123603, "compression_loss": 4.351089954376221, "distillation_loss": 0.03239811956882477, "epoch": 1.12, "learning_rate": 1.5521140142517815e-05, "loss": 4.4352, "step": 2360, "task_loss": 0.012328799813985825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.016551621476084855, "compression/movement_sparsity/importance_threshold": -0.0017888813445742116, "compression/movement_sparsity/linear_layer_sparsity": 0.05416729006699789, "compression/movement_sparsity/model_sparsity": 0.0420626058410224, "compression_loss": 4.499930381774902, "distillation_loss": 0.12336177378892899, "epoch": 1.13, "learning_rate": 1.5502137767220905e-05, "loss": 4.5803, "step": 2370, "task_loss": 0.06430968642234802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.017093723945874474, "compression/movement_sparsity/importance_threshold": -0.0017598886986568353, "compression/movement_sparsity/linear_layer_sparsity": 0.06110632386705812, "compression/movement_sparsity/model_sparsity": 0.04745098401701118, "compression_loss": 4.647174835205078, "distillation_loss": 0.030381930992007256, "epoch": 1.13, "learning_rate": 1.548313539192399e-05, "loss": 4.7628, "step": 2380, "task_loss": 0.1719023883342743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01762993725641166, "compression/movement_sparsity/importance_threshold": -0.0017312110159031996, "compression/movement_sparsity/linear_layer_sparsity": 0.06815575833709726, "compression/movement_sparsity/model_sparsity": 0.05292509178848395, "compression_loss": 4.792819023132324, "distillation_loss": 0.44239741563796997, "epoch": 1.14, "learning_rate": 1.5464133016627077e-05, "loss": 4.888, "step": 2390, "task_loss": 0.2511236369609833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018160293571318775, "compression/movement_sparsity/importance_threshold": -0.0017028465761431042, "compression/movement_sparsity/linear_layer_sparsity": 0.07490678400707618, "compression/movement_sparsity/model_sparsity": 0.058167475733254294, "compression_loss": 4.936878681182861, "distillation_loss": 0.07934065163135529, "epoch": 1.14, "learning_rate": 1.5445130641330167e-05, "loss": 5.0129, "step": 2400, "task_loss": 0.19744229316711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018684825054218146, "compression/movement_sparsity/importance_threshold": -0.0016747936592063492, "compression/movement_sparsity/linear_layer_sparsity": 0.0832007019722975, "compression/movement_sparsity/model_sparsity": 0.06460796411318578, "compression_loss": 5.079326152801514, "distillation_loss": 0.08092580735683441, "epoch": 1.14, "learning_rate": 1.5426128266033256e-05, "loss": 5.1946, "step": 2410, "task_loss": 0.020919568836688995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.019203563868732162, "compression/movement_sparsity/importance_threshold": -0.001647050544922732, "compression/movement_sparsity/linear_layer_sparsity": 0.09128394591237579, "compression/movement_sparsity/model_sparsity": 0.07088485748089549, "compression_loss": 5.220214366912842, "distillation_loss": 0.1532871127128601, "epoch": 1.15, "learning_rate": 1.5407125890736343e-05, "loss": 5.4019, "step": 2420, "task_loss": 0.10705733299255371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.01971654217848315, "compression/movement_sparsity/importance_threshold": -0.0016196155131220531, "compression/movement_sparsity/linear_layer_sparsity": 0.09979940624059018, "compression/movement_sparsity/model_sparsity": 0.07749738047950812, "compression_loss": 5.3595380783081055, "distillation_loss": 0.08937288820743561, "epoch": 1.15, "learning_rate": 1.538812351543943e-05, "loss": 5.5179, "step": 2430, "task_loss": 0.2294204831123352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.020223792147093467, "compression/movement_sparsity/importance_threshold": -0.0015924868436341117, "compression/movement_sparsity/linear_layer_sparsity": 0.10875641043736826, "compression/movement_sparsity/model_sparsity": 0.08445277619119063, "compression_loss": 5.49728536605835, "distillation_loss": 0.039297595620155334, "epoch": 1.16, "learning_rate": 1.536912114014252e-05, "loss": 5.5902, "step": 2440, "task_loss": 0.012466028332710266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.020725345938185466, "compression/movement_sparsity/importance_threshold": -0.0015656628162887072, "compression/movement_sparsity/linear_layer_sparsity": 0.11579635275519422, "compression/movement_sparsity/model_sparsity": 0.08991951300767133, "compression_loss": 5.633523941040039, "distillation_loss": 0.271115243434906, "epoch": 1.16, "learning_rate": 1.5350118764845608e-05, "loss": 5.8822, "step": 2450, "task_loss": 0.06959662586450577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02122123571538147, "compression/movement_sparsity/importance_threshold": -0.0015391417109156397, "compression/movement_sparsity/linear_layer_sparsity": 0.1232250616342969, "compression/movement_sparsity/model_sparsity": 0.09568813929676413, "compression_loss": 5.76822566986084, "distillation_loss": 0.6192089319229126, "epoch": 1.17, "learning_rate": 1.5331116389548694e-05, "loss": 5.9777, "step": 2460, "task_loss": 0.36697810888290405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.021711493642303875, "compression/movement_sparsity/importance_threshold": -0.001512921807344707, "compression/movement_sparsity/linear_layer_sparsity": 0.1321434620596206, "compression/movement_sparsity/model_sparsity": 0.10261355796472424, "compression_loss": 5.90135383605957, "distillation_loss": 0.33043575286865234, "epoch": 1.17, "learning_rate": 1.5312114014251784e-05, "loss": 6.1168, "step": 2470, "task_loss": 0.23188000917434692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022196151882575007, "compression/movement_sparsity/importance_threshold": -0.0014870013854057092, "compression/movement_sparsity/linear_layer_sparsity": 0.14109207975760313, "compression/movement_sparsity/model_sparsity": 0.10956244129609777, "compression_loss": 6.032968044281006, "distillation_loss": 0.44384852051734924, "epoch": 1.18, "learning_rate": 1.5293111638954873e-05, "loss": 6.2156, "step": 2480, "task_loss": 0.26120489835739136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022675242599817222, "compression/movement_sparsity/importance_threshold": -0.0014613787249284456, "compression/movement_sparsity/linear_layer_sparsity": 0.14897257744278833, "compression/movement_sparsity/model_sparsity": 0.11568189581473888, "compression_loss": 6.16309928894043, "distillation_loss": 0.13425683975219727, "epoch": 1.18, "learning_rate": 1.527410926365796e-05, "loss": 6.4394, "step": 2490, "task_loss": 0.04769594222307205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02314879795765287, "compression/movement_sparsity/importance_threshold": -0.0014360521057427153, "compression/movement_sparsity/linear_layer_sparsity": 0.1572927958446251, "compression/movement_sparsity/model_sparsity": 0.12214280731160009, "compression_loss": 6.291727542877197, "distillation_loss": 0.3531065583229065, "epoch": 1.19, "learning_rate": 1.5255106888361047e-05, "loss": 6.4768, "step": 2500, "task_loss": 0.4275900721549988 }, { "epoch": 1.19, "eval_accuracy": 0.8979357798165137, "eval_loss": 6.5765380859375, "eval_runtime": 26.114, "eval_samples_per_second": 33.392, "eval_steps_per_second": 4.174, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.023616850119704293, "compression/movement_sparsity/importance_threshold": -0.0014110198076783182, "compression/movement_sparsity/linear_layer_sparsity": 0.16596552949036436, "compression/movement_sparsity/model_sparsity": 0.12887745799199585, "compression_loss": 6.418839454650879, "distillation_loss": 0.32202211022377014, "epoch": 1.19, "learning_rate": 1.5236104513064133e-05, "loss": 6.6039, "step": 2510, "task_loss": 0.21520735323429108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.024079431249593854, "compression/movement_sparsity/importance_threshold": -0.0013862801105650534, "compression/movement_sparsity/linear_layer_sparsity": 0.17616387665612768, "compression/movement_sparsity/model_sparsity": 0.1367967955946862, "compression_loss": 6.544415473937988, "distillation_loss": 0.24521012604236603, "epoch": 1.2, "learning_rate": 1.5217102137767221e-05, "loss": 6.6944, "step": 2520, "task_loss": 0.1956482082605362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0245365735109439, "compression/movement_sparsity/importance_threshold": -0.00136183129423272, "compression/movement_sparsity/linear_layer_sparsity": 0.18661719455736225, "compression/movement_sparsity/model_sparsity": 0.14491412599956133, "compression_loss": 6.668520450592041, "distillation_loss": 0.3077329397201538, "epoch": 1.2, "learning_rate": 1.519809976247031e-05, "loss": 6.8466, "step": 2530, "task_loss": 0.16484826803207397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02498830906737678, "compression/movement_sparsity/importance_threshold": -0.0013376716385111176, "compression/movement_sparsity/linear_layer_sparsity": 0.19720587266636555, "compression/movement_sparsity/model_sparsity": 0.1531365678667026, "compression_loss": 6.791137218475342, "distillation_loss": 0.15260806679725647, "epoch": 1.21, "learning_rate": 1.5179097387173399e-05, "loss": 6.9378, "step": 2540, "task_loss": 0.04942808300256729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025434670082514835, "compression/movement_sparsity/importance_threshold": -0.001313799423230046, "compression/movement_sparsity/linear_layer_sparsity": 0.20696387571514605, "compression/movement_sparsity/model_sparsity": 0.16071396440119193, "compression_loss": 6.912316799163818, "distillation_loss": 0.1831030249595642, "epoch": 1.21, "learning_rate": 1.5160095011876485e-05, "loss": 7.1445, "step": 2550, "task_loss": 0.0789690762758255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.025875688719980434, "compression/movement_sparsity/importance_threshold": -0.0012902129282193035, "compression/movement_sparsity/linear_layer_sparsity": 0.21675474254742547, "compression/movement_sparsity/model_sparsity": 0.1683168806980702, "compression_loss": 7.032046794891357, "distillation_loss": 0.14177289605140686, "epoch": 1.22, "learning_rate": 1.5141092636579573e-05, "loss": 7.2325, "step": 2560, "task_loss": 0.2033880352973938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02631139714339592, "compression/movement_sparsity/importance_threshold": -0.00126691043330869, "compression/movement_sparsity/linear_layer_sparsity": 0.22645565153568203, "compression/movement_sparsity/model_sparsity": 0.1758499418973284, "compression_loss": 7.150337219238281, "distillation_loss": 0.21663016080856323, "epoch": 1.22, "learning_rate": 1.5122090261282662e-05, "loss": 7.3976, "step": 2570, "task_loss": 0.18229436874389648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02674182751638365, "compression/movement_sparsity/importance_threshold": -0.0012438902183280046, "compression/movement_sparsity/linear_layer_sparsity": 0.2362620798517013, "compression/movement_sparsity/model_sparsity": 0.18346494217618248, "compression_loss": 7.267183780670166, "distillation_loss": 0.062164291739463806, "epoch": 1.23, "learning_rate": 1.510308788598575e-05, "loss": 7.4626, "step": 2580, "task_loss": 0.006800137460231781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.027167012002565962, "compression/movement_sparsity/importance_threshold": -0.0012211505631070472, "compression/movement_sparsity/linear_layer_sparsity": 0.24733038806082505, "compression/movement_sparsity/model_sparsity": 0.19205983191409415, "compression_loss": 7.382561683654785, "distillation_loss": 0.05586311221122742, "epoch": 1.23, "learning_rate": 1.5084085510688838e-05, "loss": 7.5546, "step": 2590, "task_loss": 0.01366850733757019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.027586982765565204, "compression/movement_sparsity/importance_threshold": -0.001198689747475617, "compression/movement_sparsity/linear_layer_sparsity": 0.2569306825880759, "compression/movement_sparsity/model_sparsity": 0.19951476281719105, "compression_loss": 7.496555805206299, "distillation_loss": 0.6351262331008911, "epoch": 1.24, "learning_rate": 1.5065083135391924e-05, "loss": 7.888, "step": 2600, "task_loss": 0.46579158306121826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.028001771969003754, "compression/movement_sparsity/importance_threshold": -0.0011765060512635124, "compression/movement_sparsity/linear_layer_sparsity": 0.2665302008054803, "compression/movement_sparsity/model_sparsity": 0.20696909089125515, "compression_loss": 7.60915994644165, "distillation_loss": 0.42842957377433777, "epoch": 1.24, "learning_rate": 1.5046080760095012e-05, "loss": 7.9082, "step": 2610, "task_loss": 0.14085018634796143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02841141177650394, "compression/movement_sparsity/importance_threshold": -0.0011545977543005338, "compression/movement_sparsity/linear_layer_sparsity": 0.2762365557249323, "compression/movement_sparsity/model_sparsity": 0.21450638102751624, "compression_loss": 7.720366954803467, "distillation_loss": 0.3830278515815735, "epoch": 1.24, "learning_rate": 1.5027078384798102e-05, "loss": 8.14, "step": 2620, "task_loss": 0.28632909059524536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.028815934351688118, "compression/movement_sparsity/importance_threshold": -0.00113296313641648, "compression/movement_sparsity/linear_layer_sparsity": 0.285795494109455, "compression/movement_sparsity/model_sparsity": 0.22192919758395624, "compression_loss": 7.830181121826172, "distillation_loss": 0.2521060109138489, "epoch": 1.25, "learning_rate": 1.500807600950119e-05, "loss": 8.1153, "step": 2630, "task_loss": 0.1563887596130371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.029215371858178636, "compression/movement_sparsity/importance_threshold": -0.0011116004774411505, "compression/movement_sparsity/linear_layer_sparsity": 0.2953732638888889, "compression/movement_sparsity/model_sparsity": 0.22936663731132845, "compression_loss": 7.938580513000488, "distillation_loss": 0.1190139651298523, "epoch": 1.25, "learning_rate": 1.4989073634204276e-05, "loss": 8.1567, "step": 2640, "task_loss": 0.04846365749835968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.029609756459597847, "compression/movement_sparsity/importance_threshold": -0.0010905080572043448, "compression/movement_sparsity/linear_layer_sparsity": 0.30575512599744054, "compression/movement_sparsity/model_sparsity": 0.2374284800438655, "compression_loss": 8.045588493347168, "distillation_loss": 0.06826656311750412, "epoch": 1.26, "learning_rate": 1.4970071258907363e-05, "loss": 8.2518, "step": 2650, "task_loss": 0.011266250163316727 }, { "compression/movement_sparsity/importance_regularization_factor": 0.02999912031956811, "compression/movement_sparsity/importance_threshold": -0.001069684155535862, "compression/movement_sparsity/linear_layer_sparsity": 0.31633204183604335, "compression/movement_sparsity/model_sparsity": 0.2456417881377824, "compression_loss": 8.151252746582031, "distillation_loss": 0.1792048215866089, "epoch": 1.26, "learning_rate": 1.4951068883610453e-05, "loss": 8.4222, "step": 2660, "task_loss": 0.06012868136167526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03038349560171176, "compression/movement_sparsity/importance_threshold": -0.0010491270522655018, "compression/movement_sparsity/linear_layer_sparsity": 0.32636890479147845, "compression/movement_sparsity/model_sparsity": 0.25343572816787524, "compression_loss": 8.255572319030762, "distillation_loss": 0.0919712483882904, "epoch": 1.27, "learning_rate": 1.4932066508313541e-05, "loss": 8.513, "step": 2670, "task_loss": 0.017085224390029907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.030762914469651154, "compression/movement_sparsity/importance_threshold": -0.001028835027223063, "compression/movement_sparsity/linear_layer_sparsity": 0.3357471276535682, "compression/movement_sparsity/model_sparsity": 0.2607182134324959, "compression_loss": 8.358521461486816, "distillation_loss": 0.5716733336448669, "epoch": 1.27, "learning_rate": 1.4913064133016629e-05, "loss": 8.648, "step": 2680, "task_loss": 0.34179773926734924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03113740908700865, "compression/movement_sparsity/importance_threshold": -0.0010088063602383453, "compression/movement_sparsity/linear_layer_sparsity": 0.3457659002371274, "compression/movement_sparsity/model_sparsity": 0.26849810571936966, "compression_loss": 8.460137367248535, "distillation_loss": 0.3870583772659302, "epoch": 1.28, "learning_rate": 1.4894061757719715e-05, "loss": 8.7722, "step": 2690, "task_loss": 0.2868567109107971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03150701161740658, "compression/movement_sparsity/importance_threshold": -0.0009890393311411487, "compression/movement_sparsity/linear_layer_sparsity": 0.35571594587473654, "compression/movement_sparsity/model_sparsity": 0.27622462936929343, "compression_loss": 8.560412406921387, "distillation_loss": 0.5741435289382935, "epoch": 1.28, "learning_rate": 1.4875059382422804e-05, "loss": 8.8839, "step": 2700, "task_loss": 0.5338079333305359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03187175422446732, "compression/movement_sparsity/importance_threshold": -0.000969532219761271, "compression/movement_sparsity/linear_layer_sparsity": 0.36540933877220716, "compression/movement_sparsity/model_sparsity": 0.28375185408746123, "compression_loss": 8.659358024597168, "distillation_loss": 0.5666482448577881, "epoch": 1.29, "learning_rate": 1.4856057007125892e-05, "loss": 9.0379, "step": 2710, "task_loss": 0.368877112865448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.032231669071813206, "compression/movement_sparsity/importance_threshold": -0.0009502833059285125, "compression/movement_sparsity/linear_layer_sparsity": 0.3747603790274014, "compression/movement_sparsity/model_sparsity": 0.2910132312021604, "compression_loss": 8.756999015808105, "distillation_loss": 0.4417470097541809, "epoch": 1.29, "learning_rate": 1.483705463182898e-05, "loss": 9.134, "step": 2720, "task_loss": 0.2064928412437439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.032586788323066586, "compression/movement_sparsity/importance_threshold": -0.0009312908694726729, "compression/movement_sparsity/linear_layer_sparsity": 0.38400508835817526, "compression/movement_sparsity/model_sparsity": 0.29819203900691116, "compression_loss": 8.85331916809082, "distillation_loss": 0.38386473059654236, "epoch": 1.3, "learning_rate": 1.4818052256532068e-05, "loss": 9.1598, "step": 2730, "task_loss": 0.21585097908973694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03293714414184983, "compression/movement_sparsity/importance_threshold": -0.00091255319022355, "compression/movement_sparsity/linear_layer_sparsity": 0.39283086090409514, "compression/movement_sparsity/model_sparsity": 0.3050455292107293, "compression_loss": 8.948369026184082, "distillation_loss": 0.3611488938331604, "epoch": 1.3, "learning_rate": 1.4799049881235154e-05, "loss": 9.3696, "step": 2740, "task_loss": 0.24724030494689941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.033282768691785265, "compression/movement_sparsity/importance_threshold": -0.0008940685480109448, "compression/movement_sparsity/linear_layer_sparsity": 0.4017413100346281, "compression/movement_sparsity/model_sparsity": 0.31196477344798973, "compression_loss": 9.04212474822998, "distillation_loss": 0.436295747756958, "epoch": 1.31, "learning_rate": 1.4780047505938244e-05, "loss": 9.3594, "step": 2750, "task_loss": 0.18571683764457703 }, { "epoch": 1.31, "eval_accuracy": 0.8818807339449541, "eval_loss": 9.464767456054688, "eval_runtime": 25.44, "eval_samples_per_second": 34.277, "eval_steps_per_second": 4.285, "step": 2750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.033623694136495255, "compression/movement_sparsity/importance_threshold": -0.000875835222664656, "compression/movement_sparsity/linear_layer_sparsity": 0.4111423987503764, "compression/movement_sparsity/model_sparsity": 0.31926501476775854, "compression_loss": 9.134596824645996, "distillation_loss": 0.541549563407898, "epoch": 1.31, "learning_rate": 1.4761045130641332e-05, "loss": 9.4946, "step": 2760, "task_loss": 0.27293768525123596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03395995263960215, "compression/movement_sparsity/importance_threshold": -0.0008578514940144827, "compression/movement_sparsity/linear_layer_sparsity": 0.4209716693955134, "compression/movement_sparsity/model_sparsity": 0.3268977528342144, "compression_loss": 9.225789070129395, "distillation_loss": 0.18441905081272125, "epoch": 1.32, "learning_rate": 1.474204275534442e-05, "loss": 9.5998, "step": 2770, "task_loss": 0.05545267462730408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03429157636472829, "compression/movement_sparsity/importance_threshold": -0.0008401156418902246, "compression/movement_sparsity/linear_layer_sparsity": 0.4294070404245709, "compression/movement_sparsity/model_sparsity": 0.3334480839709423, "compression_loss": 9.31571102142334, "distillation_loss": 0.2552655339241028, "epoch": 1.32, "learning_rate": 1.4723040380047506e-05, "loss": 9.5848, "step": 2780, "task_loss": 0.07465029507875443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03461859747549605, "compression/movement_sparsity/importance_threshold": -0.0008226259461216808, "compression/movement_sparsity/linear_layer_sparsity": 0.43871413683754895, "compression/movement_sparsity/model_sparsity": 0.3406753373088751, "compression_loss": 9.404358863830566, "distillation_loss": 0.5486918687820435, "epoch": 1.33, "learning_rate": 1.4704038004750595e-05, "loss": 9.6934, "step": 2790, "task_loss": 0.25401195883750916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03494104813552775, "compression/movement_sparsity/importance_threshold": -0.0008053806865386509, "compression/movement_sparsity/linear_layer_sparsity": 0.44789961137458595, "compression/movement_sparsity/model_sparsity": 0.34780814743166794, "compression_loss": 9.491792678833008, "distillation_loss": 0.7868499755859375, "epoch": 1.33, "learning_rate": 1.4686935866983374e-05, "loss": 9.9283, "step": 2800, "task_loss": 0.47319698333740234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03525896050844576, "compression/movement_sparsity/importance_threshold": -0.0007883781429709343, "compression/movement_sparsity/linear_layer_sparsity": 0.4564576275971093, "compression/movement_sparsity/model_sparsity": 0.35445371642180634, "compression_loss": 9.577984809875488, "distillation_loss": 0.3035008907318115, "epoch": 1.33, "learning_rate": 1.4667933491686462e-05, "loss": 9.8715, "step": 2810, "task_loss": 0.44653627276420593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03557236675787243, "compression/movement_sparsity/importance_threshold": -0.0007716165952483297, "compression/movement_sparsity/linear_layer_sparsity": 0.4642448975271003, "compression/movement_sparsity/model_sparsity": 0.36050077665387104, "compression_loss": 9.66294002532959, "distillation_loss": 0.10570189356803894, "epoch": 1.34, "learning_rate": 1.4648931116389552e-05, "loss": 10.013, "step": 2820, "task_loss": 0.022750303149223328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03588129904743011, "compression/movement_sparsity/importance_threshold": -0.0007550943232006373, "compression/movement_sparsity/linear_layer_sparsity": 0.4726805626129178, "compression/movement_sparsity/model_sparsity": 0.3670513361349295, "compression_loss": 9.746687889099121, "distillation_loss": 0.5314592123031616, "epoch": 1.34, "learning_rate": 1.4629928741092638e-05, "loss": 10.1699, "step": 2830, "task_loss": 0.5152712464332581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.036185789540741135, "compression/movement_sparsity/importance_threshold": -0.0007388096066576562, "compression/movement_sparsity/linear_layer_sparsity": 0.48106820234869013, "compression/movement_sparsity/model_sparsity": 0.37356460241991285, "compression_loss": 9.829238891601562, "distillation_loss": 0.6711559295654297, "epoch": 1.35, "learning_rate": 1.4610926365795726e-05, "loss": 10.3624, "step": 2840, "task_loss": 0.32968375086784363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.036485870401427874, "compression/movement_sparsity/importance_threshold": -0.0007227607254491853, "compression/movement_sparsity/linear_layer_sparsity": 0.4889538048592291, "compression/movement_sparsity/model_sparsity": 0.3796880209961333, "compression_loss": 9.910573959350586, "distillation_loss": 0.2346329241991043, "epoch": 1.35, "learning_rate": 1.4591923990498813e-05, "loss": 10.5236, "step": 2850, "task_loss": 0.04817202687263489 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03678157379311268, "compression/movement_sparsity/importance_threshold": -0.0007069459594050238, "compression/movement_sparsity/linear_layer_sparsity": 0.4977513832429991, "compression/movement_sparsity/model_sparsity": 0.38651961754553266, "compression_loss": 9.990703582763672, "distillation_loss": 0.2503522038459778, "epoch": 1.36, "learning_rate": 1.45729216152019e-05, "loss": 10.4097, "step": 2860, "task_loss": 0.17465853691101074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03707293187941788, "compression/movement_sparsity/importance_threshold": -0.0006913635883549723, "compression/movement_sparsity/linear_layer_sparsity": 0.5060777297877146, "compression/movement_sparsity/model_sparsity": 0.39298528773824376, "compression_loss": 10.069620132446289, "distillation_loss": 0.6797527074813843, "epoch": 1.36, "learning_rate": 1.455391923990499e-05, "loss": 10.5071, "step": 2870, "task_loss": 0.306240975856781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03735997682396585, "compression/movement_sparsity/importance_threshold": -0.0006760118921288289, "compression/movement_sparsity/linear_layer_sparsity": 0.5141535282106293, "compression/movement_sparsity/model_sparsity": 0.3992563994275024, "compression_loss": 10.147372245788574, "distillation_loss": 0.5875498056411743, "epoch": 1.37, "learning_rate": 1.4534916864608077e-05, "loss": 10.567, "step": 2880, "task_loss": 0.4646558165550232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03764274079037894, "compression/movement_sparsity/importance_threshold": -0.0006608891505563933, "compression/movement_sparsity/linear_layer_sparsity": 0.5215901883845228, "compression/movement_sparsity/model_sparsity": 0.4050312001472949, "compression_loss": 10.223993301391602, "distillation_loss": 0.8365118503570557, "epoch": 1.37, "learning_rate": 1.4515914489311165e-05, "loss": 10.7409, "step": 2890, "task_loss": 0.357979416847229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03792125594227949, "compression/movement_sparsity/importance_threshold": -0.0006459936434674647, "compression/movement_sparsity/linear_layer_sparsity": 0.5287817816546221, "compression/movement_sparsity/model_sparsity": 0.41061569870195747, "compression_loss": 10.299442291259766, "distillation_loss": 0.34872639179229736, "epoch": 1.38, "learning_rate": 1.4496912114014253e-05, "loss": 10.6705, "step": 2900, "task_loss": 0.3244781494140625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03819555444328984, "compression/movement_sparsity/importance_threshold": -0.0006313236506918429, "compression/movement_sparsity/linear_layer_sparsity": 0.5359220914257754, "compression/movement_sparsity/model_sparsity": 0.41616037400536177, "compression_loss": 10.373762130737305, "distillation_loss": 0.966638445854187, "epoch": 1.38, "learning_rate": 1.4477909738717342e-05, "loss": 10.9055, "step": 2910, "task_loss": 0.6029879450798035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03846566845703237, "compression/movement_sparsity/importance_threshold": -0.0006168774520593267, "compression/movement_sparsity/linear_layer_sparsity": 0.5422028027137911, "compression/movement_sparsity/model_sparsity": 0.4210375440277554, "compression_loss": 10.446945190429688, "distillation_loss": 0.1921069324016571, "epoch": 1.39, "learning_rate": 1.4458907363420428e-05, "loss": 10.8335, "step": 2920, "task_loss": 0.06273065507411957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03873163014712941, "compression/movement_sparsity/importance_threshold": -0.0006026533273997157, "compression/movement_sparsity/linear_layer_sparsity": 0.5490401163994278, "compression/movement_sparsity/model_sparsity": 0.4263469333328998, "compression_loss": 10.518943786621094, "distillation_loss": 0.16988125443458557, "epoch": 1.39, "learning_rate": 1.4439904988123516e-05, "loss": 10.808, "step": 2930, "task_loss": 0.1513996422290802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03899347167720332, "compression/movement_sparsity/importance_threshold": -0.0005886495565428094, "compression/movement_sparsity/linear_layer_sparsity": 0.5570446472259861, "compression/movement_sparsity/model_sparsity": 0.43256270349019194, "compression_loss": 10.589797973632812, "distillation_loss": 0.29276514053344727, "epoch": 1.4, "learning_rate": 1.4420902612826604e-05, "loss": 11.0576, "step": 2940, "task_loss": 0.39499735832214355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03925122521087644, "compression/movement_sparsity/importance_threshold": -0.0005748644193184068, "compression/movement_sparsity/linear_layer_sparsity": 0.5645892144685336, "compression/movement_sparsity/model_sparsity": 0.43842129744554487, "compression_loss": 10.659571647644043, "distillation_loss": 0.4497772753238678, "epoch": 1.4, "learning_rate": 1.4401900237529694e-05, "loss": 11.046, "step": 2950, "task_loss": 0.18338404595851898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03950492291177113, "compression/movement_sparsity/importance_threshold": -0.0005612961955563074, "compression/movement_sparsity/linear_layer_sparsity": 0.5716300271943692, "compression/movement_sparsity/model_sparsity": 0.44388871016124415, "compression_loss": 10.72823429107666, "distillation_loss": 0.9414302110671997, "epoch": 1.41, "learning_rate": 1.4382897862232782e-05, "loss": 11.2474, "step": 2960, "task_loss": 0.3824530839920044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03975459694350974, "compression/movement_sparsity/importance_threshold": -0.0005479431650863105, "compression/movement_sparsity/linear_layer_sparsity": 0.5780708112202649, "compression/movement_sparsity/model_sparsity": 0.44889018170344835, "compression_loss": 10.795801162719727, "distillation_loss": 0.5067998170852661, "epoch": 1.41, "learning_rate": 1.4363895486935868e-05, "loss": 11.2138, "step": 2970, "task_loss": 0.17481166124343872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04000027946971461, "compression/movement_sparsity/importance_threshold": -0.000534803607738216, "compression/movement_sparsity/linear_layer_sparsity": 0.5847961833785005, "compression/movement_sparsity/model_sparsity": 0.4541126448888163, "compression_loss": 10.862251281738281, "distillation_loss": 0.47052228450775146, "epoch": 1.42, "learning_rate": 1.4344893111638956e-05, "loss": 11.3225, "step": 2980, "task_loss": 0.4483351707458496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.040242002654008104, "compression/movement_sparsity/importance_threshold": -0.0005218758033418225, "compression/movement_sparsity/linear_layer_sparsity": 0.5909785738482385, "compression/movement_sparsity/model_sparsity": 0.45891346570082736, "compression_loss": 10.92764663696289, "distillation_loss": 0.43901222944259644, "epoch": 1.42, "learning_rate": 1.4325890736342044e-05, "loss": 11.3835, "step": 2990, "task_loss": 0.24378883838653564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04047979866001257, "compression/movement_sparsity/importance_threshold": -0.0005091580317269292, "compression/movement_sparsity/linear_layer_sparsity": 0.5969155445460704, "compression/movement_sparsity/model_sparsity": 0.46352371033451184, "compression_loss": 10.991974830627441, "distillation_loss": 0.6936439275741577, "epoch": 1.43, "learning_rate": 1.4306888361045133e-05, "loss": 11.5481, "step": 3000, "task_loss": 0.26209497451782227 }, { "epoch": 1.43, "eval_accuracy": 0.856651376146789, "eval_loss": 11.539105415344238, "eval_runtime": 24.425, "eval_samples_per_second": 35.701, "eval_steps_per_second": 4.463, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.040713699651350355, "compression/movement_sparsity/importance_threshold": -0.0004966485727233363, "compression/movement_sparsity/linear_layer_sparsity": 0.6025890286246612, "compression/movement_sparsity/model_sparsity": 0.46792934931419705, "compression_loss": 11.055262565612793, "distillation_loss": 0.39871829748153687, "epoch": 1.43, "learning_rate": 1.428788598574822e-05, "loss": 11.5298, "step": 3010, "task_loss": 0.16510379314422607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.040943737791643814, "compression/movement_sparsity/importance_threshold": -0.00048434570616084265, "compression/movement_sparsity/linear_layer_sparsity": 0.6088090701219512, "compression/movement_sparsity/model_sparsity": 0.47275940733429933, "compression_loss": 11.11749267578125, "distillation_loss": 0.4512562155723572, "epoch": 1.43, "learning_rate": 1.4268883610451307e-05, "loss": 11.6419, "step": 3020, "task_loss": 0.2681746184825897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.041169945244515296, "compression/movement_sparsity/importance_threshold": -0.000472247711869247, "compression/movement_sparsity/linear_layer_sparsity": 0.6145064551339958, "compression/movement_sparsity/model_sparsity": 0.47718360614117644, "compression_loss": 11.178674697875977, "distillation_loss": 0.33271324634552, "epoch": 1.44, "learning_rate": 1.4249881235154395e-05, "loss": 11.726, "step": 3030, "task_loss": 0.2389669120311737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04139235417358715, "compression/movement_sparsity/importance_threshold": -0.00046035286967835, "compression/movement_sparsity/linear_layer_sparsity": 0.6200651276912075, "compression/movement_sparsity/model_sparsity": 0.4815000903604185, "compression_loss": 11.238801002502441, "distillation_loss": 1.2966899871826172, "epoch": 1.44, "learning_rate": 1.4230878859857485e-05, "loss": 11.8034, "step": 3040, "task_loss": 0.7004615068435669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04161099674248173, "compression/movement_sparsity/importance_threshold": -0.00044865945941794955, "compression/movement_sparsity/linear_layer_sparsity": 0.6260913622591087, "compression/movement_sparsity/model_sparsity": 0.4861796511991028, "compression_loss": 11.297928810119629, "distillation_loss": 0.4639695882797241, "epoch": 1.45, "learning_rate": 1.4211876484560572e-05, "loss": 11.7174, "step": 3050, "task_loss": 0.23858779668807983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.041825905114821385, "compression/movement_sparsity/importance_threshold": -0.0004371657609178463, "compression/movement_sparsity/linear_layer_sparsity": 0.630700890168624, "compression/movement_sparsity/model_sparsity": 0.489759094721777, "compression_loss": 11.356048583984375, "distillation_loss": 0.2719360888004303, "epoch": 1.45, "learning_rate": 1.4192874109263659e-05, "loss": 11.7773, "step": 3060, "task_loss": 0.21212808787822723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04203711145422847, "compression/movement_sparsity/importance_threshold": -0.00042587005400783863, "compression/movement_sparsity/linear_layer_sparsity": 0.6357438459801265, "compression/movement_sparsity/model_sparsity": 0.4936751086539962, "compression_loss": 11.413163185119629, "distillation_loss": 0.750543475151062, "epoch": 1.46, "learning_rate": 1.4173871733966746e-05, "loss": 12.0443, "step": 3070, "task_loss": 0.33162063360214233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04224464792432533, "compression/movement_sparsity/importance_threshold": -0.00041477061851772625, "compression/movement_sparsity/linear_layer_sparsity": 0.6405869490552545, "compression/movement_sparsity/model_sparsity": 0.49743593064536007, "compression_loss": 11.469284057617188, "distillation_loss": 0.4959287941455841, "epoch": 1.46, "learning_rate": 1.4154869358669834e-05, "loss": 11.8504, "step": 3080, "task_loss": 0.22281783819198608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04244854668873431, "compression/movement_sparsity/importance_threshold": -0.0004038657342773089, "compression/movement_sparsity/linear_layer_sparsity": 0.6453207100647396, "compression/movement_sparsity/model_sparsity": 0.5011118450808302, "compression_loss": 11.524415969848633, "distillation_loss": 0.7054104804992676, "epoch": 1.47, "learning_rate": 1.4135866983372924e-05, "loss": 12.0627, "step": 3090, "task_loss": 0.45865899324417114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04264883991107778, "compression/movement_sparsity/importance_threshold": -0.0003931536811163849, "compression/movement_sparsity/linear_layer_sparsity": 0.6499866733476363, "compression/movement_sparsity/model_sparsity": 0.5047351124474349, "compression_loss": 11.57857608795166, "distillation_loss": 0.363597571849823, "epoch": 1.47, "learning_rate": 1.411686460807601e-05, "loss": 11.9987, "step": 3100, "task_loss": 0.1337648332118988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.042845559754978065, "compression/movement_sparsity/importance_threshold": -0.0003826327388647549, "compression/movement_sparsity/linear_layer_sparsity": 0.6550466021153267, "compression/movement_sparsity/model_sparsity": 0.508664306414417, "compression_loss": 11.631749153137207, "distillation_loss": 0.42896637320518494, "epoch": 1.48, "learning_rate": 1.4097862232779098e-05, "loss": 12.1107, "step": 3110, "task_loss": 0.39144840836524963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04303873838405753, "compression/movement_sparsity/importance_threshold": -0.0003723011873522173, "compression/movement_sparsity/linear_layer_sparsity": 0.6597050610697079, "compression/movement_sparsity/model_sparsity": 0.5122817464337045, "compression_loss": 11.68390941619873, "distillation_loss": 0.6013063192367554, "epoch": 1.48, "learning_rate": 1.4078859857482186e-05, "loss": 12.2025, "step": 3120, "task_loss": 0.20251774787902832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04322840796193853, "compression/movement_sparsity/importance_threshold": -0.0003621573064085718, "compression/movement_sparsity/linear_layer_sparsity": 0.6641005862315568, "compression/movement_sparsity/model_sparsity": 0.5156950100863344, "compression_loss": 11.735136985778809, "distillation_loss": 0.4356003701686859, "epoch": 1.49, "learning_rate": 1.4059857482185275e-05, "loss": 12.1346, "step": 3130, "task_loss": 0.19159378111362457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.043414600652243424, "compression/movement_sparsity/importance_threshold": -0.00035219937586361723, "compression/movement_sparsity/linear_layer_sparsity": 0.6691960135313159, "compression/movement_sparsity/model_sparsity": 0.5196517697809077, "compression_loss": 11.785431861877441, "distillation_loss": 0.36588340997695923, "epoch": 1.49, "learning_rate": 1.4040855106888363e-05, "loss": 12.2288, "step": 3140, "task_loss": 0.11971582472324371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04359734861859453, "compression/movement_sparsity/importance_threshold": -0.00034242567554715374, "compression/movement_sparsity/linear_layer_sparsity": 0.6736123226249624, "compression/movement_sparsity/model_sparsity": 0.523081172810825, "compression_loss": 11.834785461425781, "distillation_loss": 0.27993667125701904, "epoch": 1.5, "learning_rate": 1.402185273159145e-05, "loss": 12.3277, "step": 3150, "task_loss": 0.10686977207660675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04377668402461424, "compression/movement_sparsity/importance_threshold": -0.00033283448528897974, "compression/movement_sparsity/linear_layer_sparsity": 0.677556011931647, "compression/movement_sparsity/model_sparsity": 0.5261435716988137, "compression_loss": 11.883201599121094, "distillation_loss": 0.22374649345874786, "epoch": 1.5, "learning_rate": 1.4002850356294537e-05, "loss": 12.1793, "step": 3160, "task_loss": 0.11772890388965607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.043952639033924865, "compression/movement_sparsity/importance_threshold": -0.0003234240849188958, "compression/movement_sparsity/linear_layer_sparsity": 0.6809970641373081, "compression/movement_sparsity/model_sparsity": 0.5288156570556051, "compression_loss": 11.930685997009277, "distillation_loss": 1.1999856233596802, "epoch": 1.51, "learning_rate": 1.3983847980997627e-05, "loss": 12.4018, "step": 3170, "task_loss": 0.6132296323776245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04412524581014878, "compression/movement_sparsity/importance_threshold": -0.0003141927542666999, "compression/movement_sparsity/linear_layer_sparsity": 0.6850743257866606, "compression/movement_sparsity/model_sparsity": 0.5319817790723298, "compression_loss": 11.977304458618164, "distillation_loss": 0.48509520292282104, "epoch": 1.51, "learning_rate": 1.3964845605700715e-05, "loss": 12.4075, "step": 3180, "task_loss": 0.3772471845149994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04429453651690833, "compression/movement_sparsity/importance_threshold": -0.0003051387731621926, "compression/movement_sparsity/linear_layer_sparsity": 0.6893989597448058, "compression/movement_sparsity/model_sparsity": 0.5353399934737364, "compression_loss": 12.023012161254883, "distillation_loss": 0.5859512090682983, "epoch": 1.52, "learning_rate": 1.39458432304038e-05, "loss": 12.5605, "step": 3190, "task_loss": 0.14015838503837585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04446054331782587, "compression/movement_sparsity/importance_threshold": -0.00029626042143517236, "compression/movement_sparsity/linear_layer_sparsity": 0.6931833173366456, "compression/movement_sparsity/model_sparsity": 0.5382786662696276, "compression_loss": 12.067791938781738, "distillation_loss": 0.3705664277076721, "epoch": 1.52, "learning_rate": 1.3926840855106889e-05, "loss": 12.4846, "step": 3200, "task_loss": 0.1473858654499054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04462329837652374, "compression/movement_sparsity/importance_threshold": -0.00028755597891543883, "compression/movement_sparsity/linear_layer_sparsity": 0.6960933147959952, "compression/movement_sparsity/model_sparsity": 0.5405383708991116, "compression_loss": 12.111686706542969, "distillation_loss": 0.826543927192688, "epoch": 1.52, "learning_rate": 1.3907838479809977e-05, "loss": 12.5674, "step": 3210, "task_loss": 0.4986266791820526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04478283385662431, "compression/movement_sparsity/importance_threshold": -0.0002790237254327913, "compression/movement_sparsity/linear_layer_sparsity": 0.6998678391297802, "compression/movement_sparsity/model_sparsity": 0.5434694078605874, "compression_loss": 12.154720306396484, "distillation_loss": 0.4464240074157715, "epoch": 1.53, "learning_rate": 1.3888836104513066e-05, "loss": 12.4925, "step": 3220, "task_loss": 0.23257675766944885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04493918192174991, "compression/movement_sparsity/importance_threshold": -0.00027066194081702905, "compression/movement_sparsity/linear_layer_sparsity": 0.7031833526234568, "compression/movement_sparsity/model_sparsity": 0.546044008455755, "compression_loss": 12.196907043457031, "distillation_loss": 0.6404677629470825, "epoch": 1.53, "learning_rate": 1.3869833729216154e-05, "loss": 12.7082, "step": 3230, "task_loss": 0.3287440538406372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04509237473552289, "compression/movement_sparsity/importance_threshold": -0.0002624689048979522, "compression/movement_sparsity/linear_layer_sparsity": 0.707251851381361, "compression/movement_sparsity/model_sparsity": 0.5492033258114275, "compression_loss": 12.238248825073242, "distillation_loss": 0.7356147766113281, "epoch": 1.54, "learning_rate": 1.385083135391924e-05, "loss": 12.8188, "step": 3240, "task_loss": 0.4595518708229065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04524244446156562, "compression/movement_sparsity/importance_threshold": -0.0002544428975053588, "compression/movement_sparsity/linear_layer_sparsity": 0.7103969883882867, "compression/movement_sparsity/model_sparsity": 0.55164562370144, "compression_loss": 12.278731346130371, "distillation_loss": 0.5563812851905823, "epoch": 1.54, "learning_rate": 1.3831828978622328e-05, "loss": 12.7541, "step": 3250, "task_loss": 0.2265515923500061 }, { "epoch": 1.54, "eval_accuracy": 0.8577981651376146, "eval_loss": 12.835886001586914, "eval_runtime": 24.5855, "eval_samples_per_second": 35.468, "eval_steps_per_second": 4.434, "step": 3250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.045389423263500435, "compression/movement_sparsity/importance_threshold": -0.0002465821984690493, "compression/movement_sparsity/linear_layer_sparsity": 0.7141112898976212, "compression/movement_sparsity/model_sparsity": 0.5545298957440069, "compression_loss": 12.318343162536621, "distillation_loss": 0.7919510006904602, "epoch": 1.55, "learning_rate": 1.3812826603325418e-05, "loss": 12.8273, "step": 3260, "task_loss": 0.3617420196533203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0455333433049497, "compression/movement_sparsity/importance_threshold": -0.0002388850876188218, "compression/movement_sparsity/linear_layer_sparsity": 0.7169433387910268, "compression/movement_sparsity/model_sparsity": 0.556729070858333, "compression_loss": 12.357135772705078, "distillation_loss": 0.6720787286758423, "epoch": 1.55, "learning_rate": 1.3793824228028505e-05, "loss": 12.7631, "step": 3270, "task_loss": 0.34426239132881165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.045674236749535746, "compression/movement_sparsity/importance_threshold": -0.00023134984478447723, "compression/movement_sparsity/linear_layer_sparsity": 0.71958969672162, "compression/movement_sparsity/model_sparsity": 0.5587840510947658, "compression_loss": 12.395111083984375, "distillation_loss": 0.5282790064811707, "epoch": 1.56, "learning_rate": 1.3774821852731593e-05, "loss": 12.8532, "step": 3280, "task_loss": 0.2643144726753235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.045812135760880945, "compression/movement_sparsity/importance_threshold": -0.0002239747497958136, "compression/movement_sparsity/linear_layer_sparsity": 0.7223523129328515, "compression/movement_sparsity/model_sparsity": 0.5609293095457484, "compression_loss": 12.432284355163574, "distillation_loss": 0.40331676602363586, "epoch": 1.56, "learning_rate": 1.375581947743468e-05, "loss": 12.9658, "step": 3290, "task_loss": 0.1825210452079773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.045947072502607635, "compression/movement_sparsity/importance_threshold": -0.0002167580824826306, "compression/movement_sparsity/linear_layer_sparsity": 0.725434474744053, "compression/movement_sparsity/model_sparsity": 0.5633227052139176, "compression_loss": 12.468629837036133, "distillation_loss": 0.4709409475326538, "epoch": 1.57, "learning_rate": 1.3736817102137769e-05, "loss": 12.9347, "step": 3300, "task_loss": 0.2354767620563507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04607907913833817, "compression/movement_sparsity/importance_threshold": -0.00020969812267472836, "compression/movement_sparsity/linear_layer_sparsity": 0.7282369768142126, "compression/movement_sparsity/model_sparsity": 0.5654989362899041, "compression_loss": 12.504148483276367, "distillation_loss": 0.44413477182388306, "epoch": 1.57, "learning_rate": 1.3717814726840857e-05, "loss": 12.9811, "step": 3310, "task_loss": 0.25892922282218933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04620818783169489, "compression/movement_sparsity/importance_threshold": -0.0002027931502019049, "compression/movement_sparsity/linear_layer_sparsity": 0.73153312961081, "compression/movement_sparsity/model_sparsity": 0.5680585026943443, "compression_loss": 12.538874626159668, "distillation_loss": 0.2548993229866028, "epoch": 1.58, "learning_rate": 1.3698812351543945e-05, "loss": 12.9981, "step": 3320, "task_loss": 0.06697467714548111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04633443074630017, "compression/movement_sparsity/importance_threshold": -0.0001960414448939603, "compression/movement_sparsity/linear_layer_sparsity": 0.7338615886592894, "compression/movement_sparsity/model_sparsity": 0.569866624441842, "compression_loss": 12.572813034057617, "distillation_loss": 1.0099992752075195, "epoch": 1.58, "learning_rate": 1.3679809976247031e-05, "loss": 13.0474, "step": 3330, "task_loss": 0.6512800455093384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04645784004577634, "compression/movement_sparsity/importance_threshold": -0.00018944128658069433, "compression/movement_sparsity/linear_layer_sparsity": 0.7365036533611864, "compression/movement_sparsity/model_sparsity": 0.571918270851048, "compression_loss": 12.606016159057617, "distillation_loss": 0.48587775230407715, "epoch": 1.59, "learning_rate": 1.3660807600950119e-05, "loss": 13.2127, "step": 3340, "task_loss": 0.2523800730705261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04657844789374576, "compression/movement_sparsity/importance_threshold": -0.0001829909550919058, "compression/movement_sparsity/linear_layer_sparsity": 0.739569489141072, "compression/movement_sparsity/model_sparsity": 0.5742989888419817, "compression_loss": 12.638442993164062, "distillation_loss": 0.8291702270507812, "epoch": 1.59, "learning_rate": 1.3641805225653208e-05, "loss": 13.1407, "step": 3350, "task_loss": 0.37619519233703613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04669628645383078, "compression/movement_sparsity/importance_threshold": -0.00017668873025739355, "compression/movement_sparsity/linear_layer_sparsity": 0.7418803871198434, "compression/movement_sparsity/model_sparsity": 0.5760934738660553, "compression_loss": 12.670111656188965, "distillation_loss": 0.5576849579811096, "epoch": 1.6, "learning_rate": 1.3622802850356296e-05, "loss": 13.1378, "step": 3360, "task_loss": 0.2429836392402649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04681138788965375, "compression/movement_sparsity/importance_threshold": -0.00017053289190695773, "compression/movement_sparsity/linear_layer_sparsity": 0.7444494081225534, "compression/movement_sparsity/model_sparsity": 0.5780883995435379, "compression_loss": 12.701035499572754, "distillation_loss": 0.9135178327560425, "epoch": 1.6, "learning_rate": 1.3603800475059384e-05, "loss": 13.1637, "step": 3370, "task_loss": 0.5078893899917603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04692378436483701, "compression/movement_sparsity/importance_threshold": -0.0001645217198703976, "compression/movement_sparsity/linear_layer_sparsity": 0.7467615764265282, "compression/movement_sparsity/model_sparsity": 0.5798838710151197, "compression_loss": 12.731249809265137, "distillation_loss": 0.5046910047531128, "epoch": 1.61, "learning_rate": 1.358479809976247e-05, "loss": 13.1697, "step": 3380, "task_loss": 0.35627833008766174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04703350804300293, "compression/movement_sparsity/importance_threshold": -0.00015865349397751203, "compression/movement_sparsity/linear_layer_sparsity": 0.7494276361600422, "compression/movement_sparsity/model_sparsity": 0.5819541503217034, "compression_loss": 12.760735511779785, "distillation_loss": 0.8998797535896301, "epoch": 1.61, "learning_rate": 1.356579572446556e-05, "loss": 13.1898, "step": 3390, "task_loss": 0.5463681221008301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.047140591087773846, "compression/movement_sparsity/importance_threshold": -0.0001529264940581007, "compression/movement_sparsity/linear_layer_sparsity": 0.7521114686841313, "compression/movement_sparsity/model_sparsity": 0.584038230759629, "compression_loss": 12.789498329162598, "distillation_loss": 0.8819484114646912, "epoch": 1.62, "learning_rate": 1.3546793349168648e-05, "loss": 13.3173, "step": 3400, "task_loss": 0.5092288851737976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04724506566277212, "compression/movement_sparsity/importance_threshold": -0.0001473389999419629, "compression/movement_sparsity/linear_layer_sparsity": 0.7540012185712135, "compression/movement_sparsity/model_sparsity": 0.5855056810334038, "compression_loss": 12.8175630569458, "distillation_loss": 0.6930016875267029, "epoch": 1.62, "learning_rate": 1.3527790973871735e-05, "loss": 13.311, "step": 3410, "task_loss": 0.25591135025024414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.047346963931620085, "compression/movement_sparsity/importance_threshold": -0.00014188929145889833, "compression/movement_sparsity/linear_layer_sparsity": 0.7560711429162903, "compression/movement_sparsity/model_sparsity": 0.5871130424454297, "compression_loss": 12.844893455505371, "distillation_loss": 0.5201950073242188, "epoch": 1.62, "learning_rate": 1.3508788598574822e-05, "loss": 13.2855, "step": 3420, "task_loss": 0.2590838670730591 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04744631805794011, "compression/movement_sparsity/importance_threshold": -0.0001365756484387054, "compression/movement_sparsity/linear_layer_sparsity": 0.7579267822192111, "compression/movement_sparsity/model_sparsity": 0.5885540047768538, "compression_loss": 12.871529579162598, "distillation_loss": 0.5567278861999512, "epoch": 1.63, "learning_rate": 1.348978622327791e-05, "loss": 13.2248, "step": 3430, "task_loss": 0.2669626772403717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04754316020535455, "compression/movement_sparsity/importance_threshold": -0.00013139635071118426, "compression/movement_sparsity/linear_layer_sparsity": 0.7606231180367359, "compression/movement_sparsity/model_sparsity": 0.590647794415717, "compression_loss": 12.897520065307617, "distillation_loss": 0.45216840505599976, "epoch": 1.63, "learning_rate": 1.3470783847980999e-05, "loss": 13.4032, "step": 3440, "task_loss": 0.242957204580307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04763752253748573, "compression/movement_sparsity/importance_threshold": -0.00012634967810613417, "compression/movement_sparsity/linear_layer_sparsity": 0.7631808825466727, "compression/movement_sparsity/model_sparsity": 0.5926339790722238, "compression_loss": 12.922788619995117, "distillation_loss": 0.6922011375427246, "epoch": 1.64, "learning_rate": 1.3451781472684087e-05, "loss": 13.3809, "step": 3450, "task_loss": 0.4704532027244568 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04772943721795603, "compression/movement_sparsity/importance_threshold": -0.00012143391045335399, "compression/movement_sparsity/linear_layer_sparsity": 0.7656033927092744, "compression/movement_sparsity/model_sparsity": 0.5945151344704236, "compression_loss": 12.947429656982422, "distillation_loss": 0.35389190912246704, "epoch": 1.64, "learning_rate": 1.3432779097387175e-05, "loss": 13.5379, "step": 3460, "task_loss": 0.2909751236438751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04781893641038778, "compression/movement_sparsity/importance_threshold": -0.00011664732758264297, "compression/movement_sparsity/linear_layer_sparsity": 0.7674263564250225, "compression/movement_sparsity/model_sparsity": 0.5959307231798303, "compression_loss": 12.971384048461914, "distillation_loss": 0.40384596586227417, "epoch": 1.65, "learning_rate": 1.3413776722090261e-05, "loss": 13.4134, "step": 3470, "task_loss": 0.23214919865131378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04790605227840333, "compression/movement_sparsity/importance_threshold": -0.00011198820932380171, "compression/movement_sparsity/linear_layer_sparsity": 0.7692812899917194, "compression/movement_sparsity/model_sparsity": 0.597371137484861, "compression_loss": 12.994702339172363, "distillation_loss": 0.8970961570739746, "epoch": 1.65, "learning_rate": 1.339477434679335e-05, "loss": 13.5349, "step": 3480, "task_loss": 0.45820656418800354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.047990816985625045, "compression/movement_sparsity/importance_threshold": -0.00010745483550662818, "compression/movement_sparsity/linear_layer_sparsity": 0.7712226997704005, "compression/movement_sparsity/model_sparsity": 0.5988787032906373, "compression_loss": 13.017374992370605, "distillation_loss": 0.43547579646110535, "epoch": 1.66, "learning_rate": 1.3375771971496438e-05, "loss": 13.4468, "step": 3490, "task_loss": 0.22628280520439148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04807326269567527, "compression/movement_sparsity/importance_threshold": -0.00010304548596092207, "compression/movement_sparsity/linear_layer_sparsity": 0.773500263474857, "compression/movement_sparsity/model_sparsity": 0.600647303201393, "compression_loss": 13.039436340332031, "distillation_loss": 0.4127658009529114, "epoch": 1.66, "learning_rate": 1.3356769596199526e-05, "loss": 13.6184, "step": 3500, "task_loss": 0.13857224583625793 }, { "epoch": 1.66, "eval_accuracy": 0.8428899082568807, "eval_loss": 13.651920318603516, "eval_runtime": 32.6189, "eval_samples_per_second": 26.733, "eval_steps_per_second": 3.342, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.048153421572176364, "compression/movement_sparsity/importance_threshold": -9.87584405164831e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7749152881285757, "compression/movement_sparsity/model_sparsity": 0.601746114387832, "compression_loss": 13.060900688171387, "distillation_loss": 0.3880394995212555, "epoch": 1.67, "learning_rate": 1.3337767220902612e-05, "loss": 13.5323, "step": 3510, "task_loss": 0.09369392693042755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04823132577875065, "compression/movement_sparsity/importance_threshold": -9.459197900311099e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7763604947681422, "compression/movement_sparsity/model_sparsity": 0.6028683628363647, "compression_loss": 13.081767082214355, "distillation_loss": 0.6340326070785522, "epoch": 1.67, "learning_rate": 1.3318764845605704e-05, "loss": 13.5637, "step": 3520, "task_loss": 0.42461642622947693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04830700747902051, "compression/movement_sparsity/importance_threshold": -9.05443812506037e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7780264674608551, "compression/movement_sparsity/model_sparsity": 0.6041620430745457, "compression_loss": 13.102035522460938, "distillation_loss": 0.25822052359580994, "epoch": 1.68, "learning_rate": 1.329976247030879e-05, "loss": 13.6241, "step": 3530, "task_loss": 0.06908401101827621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04838049883660828, "compression/movement_sparsity/importance_threshold": -8.66139270887618e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7795130772922313, "compression/movement_sparsity/model_sparsity": 0.6053164424048282, "compression_loss": 13.121685981750488, "distillation_loss": 0.9944248795509338, "epoch": 1.68, "learning_rate": 1.3280760095011878e-05, "loss": 13.6799, "step": 3540, "task_loss": 0.43491220474243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04845183201513631, "compression/movement_sparsity/importance_threshold": -8.279889634738372e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7810252935862692, "compression/movement_sparsity/model_sparsity": 0.6064907259594202, "compression_loss": 13.140732765197754, "distillation_loss": 0.4648146629333496, "epoch": 1.69, "learning_rate": 1.3261757719714966e-05, "loss": 13.6035, "step": 3550, "task_loss": 0.22887027263641357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.048521039178226956, "compression/movement_sparsity/importance_threshold": -7.909756885626958e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7832119702273411, "compression/movement_sparsity/model_sparsity": 0.6081887492044712, "compression_loss": 13.159188270568848, "distillation_loss": 0.7744244933128357, "epoch": 1.69, "learning_rate": 1.3242755344418052e-05, "loss": 13.6462, "step": 3560, "task_loss": 0.3696683943271637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04858815248950257, "compression/movement_sparsity/importance_threshold": -7.550822444521823e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7850979091388136, "compression/movement_sparsity/model_sparsity": 0.6096532401357213, "compression_loss": 13.177058219909668, "distillation_loss": 0.3203733563423157, "epoch": 1.7, "learning_rate": 1.3223752969121141e-05, "loss": 13.6384, "step": 3570, "task_loss": 0.19227594137191772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.048653204112585495, "compression/movement_sparsity/importance_threshold": -7.202914294402937e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7865801081187895, "compression/movement_sparsity/model_sparsity": 0.6108042143010447, "compression_loss": 13.194385528564453, "distillation_loss": 0.5051361918449402, "epoch": 1.7, "learning_rate": 1.3204750593824229e-05, "loss": 13.6339, "step": 3580, "task_loss": 0.25856292247772217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.048716226211098085, "compression/movement_sparsity/importance_threshold": -6.865860418250229e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.78752878227567, "compression/movement_sparsity/model_sparsity": 0.6115408896466835, "compression_loss": 13.211155891418457, "distillation_loss": 0.5065594911575317, "epoch": 1.71, "learning_rate": 1.3185748218527317e-05, "loss": 13.7948, "step": 3590, "task_loss": 0.31428200006484985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0487772509486627, "compression/movement_sparsity/importance_threshold": -6.539488799043583e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7893345260275519, "compression/movement_sparsity/model_sparsity": 0.6129431065120898, "compression_loss": 13.227370262145996, "distillation_loss": 0.44985029101371765, "epoch": 1.71, "learning_rate": 1.3166745843230405e-05, "loss": 13.7212, "step": 3600, "task_loss": 0.16183635592460632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04883631048890167, "compression/movement_sparsity/importance_threshold": -6.223627419762968e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7903761574074074, "compression/movement_sparsity/model_sparsity": 0.613751966067521, "compression_loss": 13.2430419921875, "distillation_loss": 0.8156948089599609, "epoch": 1.71, "learning_rate": 1.3147743467933494e-05, "loss": 13.6668, "step": 3610, "task_loss": 0.43693339824676514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04889343699543736, "compression/movement_sparsity/importance_threshold": -5.918104263388357e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7916715480088828, "compression/movement_sparsity/model_sparsity": 0.6147578776464948, "compression_loss": 13.258223533630371, "distillation_loss": 0.9145314693450928, "epoch": 1.72, "learning_rate": 1.312874109263658e-05, "loss": 13.7678, "step": 3620, "task_loss": 0.4340613782405853 }, { "compression/movement_sparsity/importance_regularization_factor": 0.048948662631892126, "compression/movement_sparsity/importance_threshold": -5.622747312899589e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7927108857460102, "compression/movement_sparsity/model_sparsity": 0.6155649561161471, "compression_loss": 13.272916793823242, "distillation_loss": 0.8170522451400757, "epoch": 1.72, "learning_rate": 1.3109738717339668e-05, "loss": 13.7102, "step": 3630, "task_loss": 0.38500523567199707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049002019561888314, "compression/movement_sparsity/importance_threshold": -5.3373845512766794e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7941787935674496, "compression/movement_sparsity/model_sparsity": 0.6167048327470029, "compression_loss": 13.287137031555176, "distillation_loss": 0.32002827525138855, "epoch": 1.73, "learning_rate": 1.3090736342042756e-05, "loss": 13.8523, "step": 3640, "task_loss": 0.15141043066978455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049053539949048264, "compression/movement_sparsity/importance_threshold": -5.061843961499555e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.795318228225685, "compression/movement_sparsity/model_sparsity": 0.6175896396267948, "compression_loss": 13.300834655761719, "distillation_loss": 0.7697837352752686, "epoch": 1.73, "learning_rate": 1.3071733966745846e-05, "loss": 13.8336, "step": 3650, "task_loss": 0.4357471466064453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04910325595699434, "compression/movement_sparsity/importance_threshold": -4.795953526548144e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7962184535907859, "compression/movement_sparsity/model_sparsity": 0.6182886929605225, "compression_loss": 13.313998222351074, "distillation_loss": 0.6824323534965515, "epoch": 1.74, "learning_rate": 1.3052731591448932e-05, "loss": 13.8227, "step": 3660, "task_loss": 0.34603098034858704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049151199749348895, "compression/movement_sparsity/importance_threshold": -4.53954122940233e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7974448937631737, "compression/movement_sparsity/model_sparsity": 0.619241062360855, "compression_loss": 13.326610565185547, "distillation_loss": 0.08792783319950104, "epoch": 1.74, "learning_rate": 1.303372921615202e-05, "loss": 13.6874, "step": 3670, "task_loss": 0.2505362629890442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04919740348973427, "compression/movement_sparsity/importance_threshold": -4.2924350530420836e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7981858697493225, "compression/movement_sparsity/model_sparsity": 0.6198164535388978, "compression_loss": 13.338798522949219, "distillation_loss": 1.1169078350067139, "epoch": 1.75, "learning_rate": 1.3014726840855108e-05, "loss": 13.9032, "step": 3680, "task_loss": 0.44423192739486694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04924189934177281, "compression/movement_sparsity/importance_threshold": -4.054462980447376e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.7994761437631738, "compression/movement_sparsity/model_sparsity": 0.620818391926519, "compression_loss": 13.350573539733887, "distillation_loss": 0.7253522872924805, "epoch": 1.75, "learning_rate": 1.2995724465558196e-05, "loss": 13.7283, "step": 3690, "task_loss": 0.4123845100402832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049284719469086885, "compression/movement_sparsity/importance_threshold": -3.8254529945980914e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8006924801452876, "compression/movement_sparsity/model_sparsity": 0.6217629154156518, "compression_loss": 13.361876487731934, "distillation_loss": 0.7416278719902039, "epoch": 1.76, "learning_rate": 1.2976722090261285e-05, "loss": 13.7298, "step": 3700, "task_loss": 0.4112977981567383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04932589603529883, "compression/movement_sparsity/importance_threshold": -3.605233078474157e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.801716456357272, "compression/movement_sparsity/model_sparsity": 0.6225580651774731, "compression_loss": 13.372750282287598, "distillation_loss": 0.42862361669540405, "epoch": 1.76, "learning_rate": 1.2957719714964371e-05, "loss": 13.8126, "step": 3710, "task_loss": 0.1545902043581009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049365461204031, "compression/movement_sparsity/importance_threshold": -3.393631215055587e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8031718373607347, "compression/movement_sparsity/model_sparsity": 0.6236882143398449, "compression_loss": 13.383188247680664, "distillation_loss": 0.1796613186597824, "epoch": 1.77, "learning_rate": 1.293871733966746e-05, "loss": 13.9012, "step": 3720, "task_loss": 0.04331651329994202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04940344713890575, "compression/movement_sparsity/importance_threshold": -3.190475387322223e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8042020240514905, "compression/movement_sparsity/model_sparsity": 0.6244881867339287, "compression_loss": 13.39314079284668, "distillation_loss": 0.6083955764770508, "epoch": 1.77, "learning_rate": 1.2919714964370547e-05, "loss": 13.7336, "step": 3730, "task_loss": 0.5452806353569031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04943988600354543, "compression/movement_sparsity/importance_threshold": -2.995593578253991e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.805487898976212, "compression/movement_sparsity/model_sparsity": 0.6254867090903641, "compression_loss": 13.402615547180176, "distillation_loss": 0.35754120349884033, "epoch": 1.78, "learning_rate": 1.2900712589073637e-05, "loss": 13.7667, "step": 3740, "task_loss": 0.12456899136304855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049474809961572386, "compression/movement_sparsity/importance_threshold": -2.8088137708309063e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8067680691997893, "compression/movement_sparsity/model_sparsity": 0.6264808015667855, "compression_loss": 13.411738395690918, "distillation_loss": 0.689171552658081, "epoch": 1.78, "learning_rate": 1.2881710213776723e-05, "loss": 13.9171, "step": 3750, "task_loss": 0.5145508646965027 }, { "epoch": 1.78, "eval_accuracy": 0.8474770642201835, "eval_loss": 14.073362350463867, "eval_runtime": 23.1584, "eval_samples_per_second": 37.654, "eval_steps_per_second": 4.707, "step": 3750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04950825117660897, "compression/movement_sparsity/importance_threshold": -2.629963948032896e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8076983001166818, "compression/movement_sparsity/model_sparsity": 0.6272031551560086, "compression_loss": 13.420463562011719, "distillation_loss": 0.3995903730392456, "epoch": 1.79, "learning_rate": 1.286270783847981e-05, "loss": 13.7818, "step": 3760, "task_loss": 0.16869285702705383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049540241812277536, "compression/movement_sparsity/importance_threshold": -2.458872092839801e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8085055094474556, "compression/movement_sparsity/model_sparsity": 0.6278299786110779, "compression_loss": 13.42878532409668, "distillation_loss": 0.34937620162963867, "epoch": 1.79, "learning_rate": 1.2843705463182899e-05, "loss": 13.9115, "step": 3770, "task_loss": 0.16115997731685638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04957081403220043, "compression/movement_sparsity/importance_threshold": -2.2953661882316786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.809237898976212, "compression/movement_sparsity/model_sparsity": 0.628398702134667, "compression_loss": 13.43675708770752, "distillation_loss": 0.7227451801300049, "epoch": 1.8, "learning_rate": 1.2824703087885986e-05, "loss": 13.9422, "step": 3780, "task_loss": 0.4984316825866699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049600000000000005, "compression/movement_sparsity/importance_threshold": -2.1392742171883698e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8102044517841012, "compression/movement_sparsity/model_sparsity": 0.6291492608156068, "compression_loss": 13.444360733032227, "distillation_loss": 0.44791698455810547, "epoch": 1.8, "learning_rate": 1.2805700712589076e-05, "loss": 13.9218, "step": 3790, "task_loss": 0.32737138867378235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04962783187929862, "compression/movement_sparsity/importance_threshold": -1.990424162689802e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8111833902627221, "compression/movement_sparsity/model_sparsity": 0.629909437359752, "compression_loss": 13.45160961151123, "distillation_loss": 0.26000645756721497, "epoch": 1.81, "learning_rate": 1.2786698337292162e-05, "loss": 13.9993, "step": 3800, "task_loss": 0.18172568082809448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049654341833718606, "compression/movement_sparsity/importance_threshold": -1.8486440077159893e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8120328261442337, "compression/movement_sparsity/model_sparsity": 0.6305690510606968, "compression_loss": 13.458487510681152, "distillation_loss": 0.5715627670288086, "epoch": 1.81, "learning_rate": 1.276769596199525e-05, "loss": 13.9063, "step": 3810, "task_loss": 0.3135397434234619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04967956202688233, "compression/movement_sparsity/importance_threshold": -1.713761735246816e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8128433641975309, "compression/movement_sparsity/model_sparsity": 0.6311984593735886, "compression_loss": 13.46501636505127, "distillation_loss": 0.36794915795326233, "epoch": 1.81, "learning_rate": 1.2748693586698338e-05, "loss": 13.9184, "step": 3820, "task_loss": 0.3162195384502411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04970352462241214, "compression/movement_sparsity/importance_threshold": -1.5856053282622528e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8135937852868113, "compression/movement_sparsity/model_sparsity": 0.6317811849715307, "compression_loss": 13.471232414245605, "distillation_loss": 0.7687112092971802, "epoch": 1.82, "learning_rate": 1.2729691211401427e-05, "loss": 13.9925, "step": 3830, "task_loss": 0.4176146686077118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04972626178393039, "compression/movement_sparsity/importance_threshold": -1.4640027697421405e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8145009386291779, "compression/movement_sparsity/model_sparsity": 0.6324856180976874, "compression_loss": 13.477070808410645, "distillation_loss": 0.18658028542995453, "epoch": 1.82, "learning_rate": 1.2710688836104515e-05, "loss": 13.8647, "step": 3840, "task_loss": 0.1433614045381546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049747805675059424, "compression/movement_sparsity/importance_threshold": -1.3487820426664934e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8151889255871725, "compression/movement_sparsity/model_sparsity": 0.6330198616273545, "compression_loss": 13.482598304748535, "distillation_loss": 0.5786043405532837, "epoch": 1.83, "learning_rate": 1.2691686460807601e-05, "loss": 13.9974, "step": 3850, "task_loss": 0.24868422746658325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049768188459421596, "compression/movement_sparsity/importance_threshold": -1.2397711300152388e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8155505330660946, "compression/movement_sparsity/model_sparsity": 0.6333006612175915, "compression_loss": 13.487833023071289, "distillation_loss": 0.2891767621040344, "epoch": 1.83, "learning_rate": 1.267268408551069e-05, "loss": 13.8999, "step": 3860, "task_loss": 0.17670656740665436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04978744230063925, "compression/movement_sparsity/importance_threshold": -1.1367980147683476e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8161176156466425, "compression/movement_sparsity/model_sparsity": 0.6337410186922852, "compression_loss": 13.4927339553833, "distillation_loss": 0.9970003366470337, "epoch": 1.84, "learning_rate": 1.2653681710213779e-05, "loss": 14.0161, "step": 3870, "task_loss": 0.44373542070388794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04980559936233475, "compression/movement_sparsity/importance_threshold": -1.0396906799056173e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.8173062989310449, "compression/movement_sparsity/model_sparsity": 0.6346640686805675, "compression_loss": 13.497337341308594, "distillation_loss": 0.15605174005031586, "epoch": 1.84, "learning_rate": 1.2634679334916867e-05, "loss": 13.7718, "step": 3880, "task_loss": 0.025827720761299133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04982269180813043, "compression/movement_sparsity/importance_threshold": -9.482771084071487e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8178974588791027, "compression/movement_sparsity/model_sparsity": 0.6351231229890514, "compression_loss": 13.501633644104004, "distillation_loss": 0.5499266982078552, "epoch": 1.85, "learning_rate": 1.2615676959619953e-05, "loss": 13.9453, "step": 3890, "task_loss": 0.355935275554657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04983875180164866, "compression/movement_sparsity/importance_threshold": -8.623852832527392e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8187233819820837, "compression/movement_sparsity/model_sparsity": 0.6357644782773206, "compression_loss": 13.50567626953125, "distillation_loss": 0.44997888803482056, "epoch": 1.85, "learning_rate": 1.259667458432304e-05, "loss": 13.9764, "step": 3900, "task_loss": 0.2622376084327698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049853811506511775, "compression/movement_sparsity/importance_threshold": -7.81843187422403e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8195478700880758, "compression/movement_sparsity/model_sparsity": 0.6364047192452565, "compression_loss": 13.509462356567383, "distillation_loss": 0.7038769721984863, "epoch": 1.86, "learning_rate": 1.2577672209026129e-05, "loss": 14.057, "step": 3910, "task_loss": 0.33292850852012634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04986790308634213, "compression/movement_sparsity/importance_threshold": -7.064788038961111e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8199815449977417, "compression/movement_sparsity/model_sparsity": 0.6367414814640393, "compression_loss": 13.513002395629883, "distillation_loss": 0.3087387681007385, "epoch": 1.86, "learning_rate": 1.2558669833729218e-05, "loss": 13.9792, "step": 3920, "task_loss": 0.10697836428880692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04988105870476209, "compression/movement_sparsity/importance_threshold": -6.361201156536607e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8203553617133393, "compression/movement_sparsity/model_sparsity": 0.6370317619108833, "compression_loss": 13.516273498535156, "distillation_loss": 0.9965323805809021, "epoch": 1.87, "learning_rate": 1.2539667458432306e-05, "loss": 13.9582, "step": 3930, "task_loss": 0.5111120343208313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049893310525393975, "compression/movement_sparsity/importance_threshold": -5.705951056751094e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8209573899992472, "compression/movement_sparsity/model_sparsity": 0.6374992558258266, "compression_loss": 13.519289016723633, "distillation_loss": 0.6441047787666321, "epoch": 1.87, "learning_rate": 1.2520665083135392e-05, "loss": 13.9644, "step": 3940, "task_loss": 0.35741135478019714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04990469071186017, "compression/movement_sparsity/importance_threshold": -5.09731756940298e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8216001980766335, "compression/movement_sparsity/model_sparsity": 0.6379984165325387, "compression_loss": 13.522068977355957, "distillation_loss": 0.3542700409889221, "epoch": 1.88, "learning_rate": 1.250166270783848e-05, "loss": 13.9309, "step": 3950, "task_loss": 0.16567806899547577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049915231427783, "compression/movement_sparsity/importance_threshold": -4.533580524292407e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8219789667080699, "compression/movement_sparsity/model_sparsity": 0.6382925422979101, "compression_loss": 13.524628639221191, "distillation_loss": 0.6030027866363525, "epoch": 1.88, "learning_rate": 1.248266033254157e-05, "loss": 14.0541, "step": 3960, "task_loss": 0.4497295618057251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04992496483678483, "compression/movement_sparsity/importance_threshold": -4.013019751218216e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8225840296597411, "compression/movement_sparsity/model_sparsity": 0.6387623927263453, "compression_loss": 13.52697467803955, "distillation_loss": 0.28092095255851746, "epoch": 1.89, "learning_rate": 1.2463657957244657e-05, "loss": 13.9522, "step": 3970, "task_loss": 0.09975674748420715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04993392310248801, "compression/movement_sparsity/importance_threshold": -3.533915079980115e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.823128846262421, "compression/movement_sparsity/model_sparsity": 0.6391854599683252, "compression_loss": 13.529121398925781, "distillation_loss": 0.1360899955034256, "epoch": 1.89, "learning_rate": 1.2444655581947744e-05, "loss": 14.0078, "step": 3980, "task_loss": 0.031911686062812805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04994213838851488, "compression/movement_sparsity/importance_threshold": -3.094546340377379e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8237104552469136, "compression/movement_sparsity/model_sparsity": 0.639637097652951, "compression_loss": 13.53105640411377, "distillation_loss": 0.8466547727584839, "epoch": 1.9, "learning_rate": 1.2425653206650832e-05, "loss": 14.0804, "step": 3990, "task_loss": 0.4057835340499878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0499496428584878, "compression/movement_sparsity/importance_threshold": -2.6931933622088497e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8239907736750979, "compression/movement_sparsity/model_sparsity": 0.6398547737364343, "compression_loss": 13.532774925231934, "distillation_loss": 0.13539861142635345, "epoch": 1.9, "learning_rate": 1.2406650831353921e-05, "loss": 13.9601, "step": 4000, "task_loss": 0.03517580032348633 }, { "epoch": 1.9, "eval_accuracy": 0.8577981651376146, "eval_loss": 14.102431297302246, "eval_runtime": 23.3122, "eval_samples_per_second": 37.405, "eval_steps_per_second": 4.676, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04995646867602912, "compression/movement_sparsity/importance_threshold": -2.3281359752746686e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8247015323885878, "compression/movement_sparsity/model_sparsity": 0.6404067002510637, "compression_loss": 13.5343017578125, "distillation_loss": 0.37791934609413147, "epoch": 1.9, "learning_rate": 1.2387648456057009e-05, "loss": 14.0972, "step": 4010, "task_loss": 0.31983357667922974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04996264800476118, "compression/movement_sparsity/importance_threshold": -1.997654009373677e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8250938629177959, "compression/movement_sparsity/model_sparsity": 0.6407113572569627, "compression_loss": 13.535682678222656, "distillation_loss": 0.20223000645637512, "epoch": 1.91, "learning_rate": 1.2368646080760097e-05, "loss": 13.8255, "step": 4020, "task_loss": 0.0476217158138752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04996821300830635, "compression/movement_sparsity/importance_threshold": -1.7000272943051495e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8256924801452875, "compression/movement_sparsity/model_sparsity": 0.6411762023776709, "compression_loss": 13.536922454833984, "distillation_loss": 0.5841785669326782, "epoch": 1.91, "learning_rate": 1.2349643705463183e-05, "loss": 14.0384, "step": 4030, "task_loss": 0.35522913932800293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04997319585028697, "compression/movement_sparsity/importance_threshold": -1.4335356598687947e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8266022917607648, "compression/movement_sparsity/model_sparsity": 0.6418826997365765, "compression_loss": 13.537969589233398, "distillation_loss": 0.47049567103385925, "epoch": 1.92, "learning_rate": 1.233064133016627e-05, "loss": 13.9148, "step": 4040, "task_loss": 0.39884519577026367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04997762869432539, "compression/movement_sparsity/importance_threshold": -1.1964589358634536e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.8273503133468835, "compression/movement_sparsity/model_sparsity": 0.6424635620447807, "compression_loss": 13.538784980773926, "distillation_loss": 0.41415804624557495, "epoch": 1.92, "learning_rate": 1.231163895486936e-05, "loss": 13.8998, "step": 4050, "task_loss": 0.3223814368247986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049981543704043965, "compression/movement_sparsity/importance_threshold": -9.870769520888348e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8279402735433604, "compression/movement_sparsity/model_sparsity": 0.6429216847083958, "compression_loss": 13.53950023651123, "distillation_loss": 0.19833412766456604, "epoch": 1.93, "learning_rate": 1.2292636579572448e-05, "loss": 14.0509, "step": 4060, "task_loss": 0.10071046650409698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04998497304306504, "compression/movement_sparsity/importance_threshold": -8.036695383442129e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8284314541930141, "compression/movement_sparsity/model_sparsity": 0.6433031019444725, "compression_loss": 13.540120124816895, "distillation_loss": 0.48713254928588867, "epoch": 1.93, "learning_rate": 1.2273634204275534e-05, "loss": 14.023, "step": 4070, "task_loss": 0.2855875492095947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04998794887501098, "compression/movement_sparsity/importance_threshold": -6.44516524428429e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8293861388700693, "compression/movement_sparsity/model_sparsity": 0.6440444446482291, "compression_loss": 13.540621757507324, "distillation_loss": 0.48493221402168274, "epoch": 1.94, "learning_rate": 1.2254631828978622e-05, "loss": 14.0355, "step": 4080, "task_loss": 0.24878904223442078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999050336350411, "compression/movement_sparsity/importance_threshold": -5.078977401416253e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8297654603282144, "compression/movement_sparsity/model_sparsity": 0.644338999700942, "compression_loss": 13.540968894958496, "distillation_loss": 0.4940585196018219, "epoch": 1.94, "learning_rate": 1.2235629453681712e-05, "loss": 13.8999, "step": 4090, "task_loss": 0.3463954031467438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0499926686721668, "compression/movement_sparsity/importance_threshold": -3.920930152830765e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8303199808039747, "compression/movement_sparsity/model_sparsity": 0.644769602305832, "compression_loss": 13.541226387023926, "distillation_loss": 0.3753039836883545, "epoch": 1.95, "learning_rate": 1.22166270783848e-05, "loss": 13.9771, "step": 4100, "task_loss": 0.5592837333679199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0499944769646214, "compression/movement_sparsity/importance_threshold": -2.953821796516237e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8311270254629629, "compression/movement_sparsity/model_sparsity": 0.6453962978880762, "compression_loss": 13.541411399841309, "distillation_loss": 0.4341853857040405, "epoch": 1.95, "learning_rate": 1.2197624703087888e-05, "loss": 14.01, "step": 4110, "task_loss": 0.2506285309791565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999596040449025, "compression/movement_sparsity/importance_threshold": -2.160450630469754e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8317775143029208, "compression/movement_sparsity/model_sparsity": 0.6459014229487039, "compression_loss": 13.541484832763672, "distillation_loss": 0.3095904588699341, "epoch": 1.96, "learning_rate": 1.2178622327790974e-05, "loss": 13.9163, "step": 4120, "task_loss": 0.12216134369373322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049997151155395714, "compression/movement_sparsity/importance_threshold": -1.5236149526797263e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8322562739950317, "compression/movement_sparsity/model_sparsity": 0.6462731949202557, "compression_loss": 13.541465759277344, "distillation_loss": 0.4424591064453125, "epoch": 1.96, "learning_rate": 1.2159619952494062e-05, "loss": 13.915, "step": 4130, "task_loss": 0.1971740871667862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999808138096014, "compression/movement_sparsity/importance_threshold": -1.0261130611475752e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.8326778572907256, "compression/movement_sparsity/model_sparsity": 0.6466005676201638, "compression_loss": 13.541352272033691, "distillation_loss": 0.8569298982620239, "epoch": 1.97, "learning_rate": 1.2140617577197151e-05, "loss": 13.9558, "step": 4140, "task_loss": 0.45095348358154297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999878324480587, "compression/movement_sparsity/importance_threshold": -6.507432538617117e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.8331807531616983, "compression/movement_sparsity/model_sparsity": 0.6469910820943721, "compression_loss": 13.541196823120117, "distillation_loss": 0.5633354187011719, "epoch": 1.97, "learning_rate": 1.2121615201900239e-05, "loss": 13.8935, "step": 4150, "task_loss": 0.26475971937179565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999928891055526, "compression/movement_sparsity/importance_threshold": -3.803038288148833e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.8333070799457994, "compression/movement_sparsity/model_sparsity": 0.6470891788188018, "compression_loss": 13.540977478027344, "distillation_loss": 0.5583696365356445, "epoch": 1.98, "learning_rate": 1.2102612826603327e-05, "loss": 13.9072, "step": 4160, "task_loss": 0.3018655776977539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999963054183066, "compression/movement_sparsity/importance_threshold": -1.975930839998377e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.834074744523487, "compression/movement_sparsity/model_sparsity": 0.6476852945282907, "compression_loss": 13.540740013122559, "distillation_loss": 0.480516254901886, "epoch": 1.98, "learning_rate": 1.2083610451306413e-05, "loss": 14.0023, "step": 4170, "task_loss": 0.21734555065631866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999984030225443, "compression/movement_sparsity/importance_threshold": -8.540931741365942e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.8346077047576032, "compression/movement_sparsity/model_sparsity": 0.6480991549268605, "compression_loss": 13.540478706359863, "distillation_loss": 0.5318132638931274, "epoch": 1.99, "learning_rate": 1.2064608076009503e-05, "loss": 13.8838, "step": 4180, "task_loss": 0.33174723386764526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04999995035544891, "compression/movement_sparsity/importance_threshold": -2.655082704475925e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.8350744198848239, "compression/movement_sparsity/model_sparsity": 0.6484615739146306, "compression_loss": 13.54019546508789, "distillation_loss": 0.5515092611312866, "epoch": 1.99, "learning_rate": 1.204560570071259e-05, "loss": 13.9953, "step": 4190, "task_loss": 0.2831692099571228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049999992865036445, "compression/movement_sparsity/importance_threshold": -3.815910894558461e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.8356860344211081, "compression/movement_sparsity/model_sparsity": 0.6489365118547517, "compression_loss": 13.539885520935059, "distillation_loss": 0.5041601061820984, "epoch": 2.0, "learning_rate": 1.2026603325415678e-05, "loss": 13.9405, "step": 4200, "task_loss": 0.24462735652923584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.049999999994639395, "compression/movement_sparsity/importance_threshold": -2.8669514678947294e-13, "compression/movement_sparsity/linear_layer_sparsity": 0.836323984680819, "compression/movement_sparsity/model_sparsity": 0.6494319003131221, "compression_loss": 13.539533615112305, "distillation_loss": 0.37183666229248047, "epoch": 2.0, "learning_rate": 1.2007600950118764e-05, "loss": 13.9897, "step": 4210, "task_loss": 0.1697111278772354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.6137279272079468, "epoch": 2.0, "learning_rate": 1.1988598574821854e-05, "loss": 0.7213, "step": 4220, "task_loss": 0.35716748237609863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.24926382303237915, "epoch": 2.01, "learning_rate": 1.1969596199524942e-05, "loss": 0.2347, "step": 4230, "task_loss": 0.09346465766429901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11653564870357513, "epoch": 2.01, "learning_rate": 1.195059382422803e-05, "loss": 0.1942, "step": 4240, "task_loss": 0.07350118458271027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4009395241737366, "epoch": 2.02, "learning_rate": 1.1931591448931118e-05, "loss": 0.2701, "step": 4250, "task_loss": 0.47706255316734314 }, { "epoch": 2.02, "eval_accuracy": 0.9048165137614679, "eval_loss": 0.33537691831588745, "eval_runtime": 22.0839, "eval_samples_per_second": 39.486, "eval_steps_per_second": 4.936, "step": 4250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.260000079870224, "epoch": 2.02, "learning_rate": 1.1912589073634204e-05, "loss": 0.2824, "step": 4260, "task_loss": 0.33928802609443665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.5958622097969055, "epoch": 2.03, "learning_rate": 1.1893586698337293e-05, "loss": 0.2131, "step": 4270, "task_loss": 0.2858772873878479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.28239572048187256, "epoch": 2.03, "learning_rate": 1.1874584323040381e-05, "loss": 0.3669, "step": 4280, "task_loss": 0.3893040120601654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09593847393989563, "epoch": 2.04, "learning_rate": 1.1855581947743469e-05, "loss": 0.2448, "step": 4290, "task_loss": 0.20446573197841644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017060134559869766, "epoch": 2.04, "learning_rate": 1.1836579572446555e-05, "loss": 0.1788, "step": 4300, "task_loss": 0.0036827102303504944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1649906486272812, "epoch": 2.05, "learning_rate": 1.1817577197149645e-05, "loss": 0.1344, "step": 4310, "task_loss": 0.07797713577747345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.36799219250679016, "epoch": 2.05, "learning_rate": 1.1798574821852733e-05, "loss": 0.1798, "step": 4320, "task_loss": 0.19989198446273804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2834874391555786, "epoch": 2.06, "learning_rate": 1.177957244655582e-05, "loss": 0.0852, "step": 4330, "task_loss": 0.11706365644931793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4932941794395447, "epoch": 2.06, "learning_rate": 1.1760570071258908e-05, "loss": 0.3161, "step": 4340, "task_loss": 0.25535914301872253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4178961515426636, "epoch": 2.07, "learning_rate": 1.1741567695961998e-05, "loss": 0.2081, "step": 4350, "task_loss": 0.10093335807323456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3657403588294983, "epoch": 2.07, "learning_rate": 1.1722565320665084e-05, "loss": 0.2158, "step": 4360, "task_loss": 0.25138112902641296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02273857593536377, "epoch": 2.08, "learning_rate": 1.1703562945368172e-05, "loss": 0.1983, "step": 4370, "task_loss": 0.006311226636171341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17630484700202942, "epoch": 2.08, "learning_rate": 1.168456057007126e-05, "loss": 0.2566, "step": 4380, "task_loss": 0.505398154258728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.031373463571071625, "epoch": 2.09, "learning_rate": 1.1665558194774346e-05, "loss": 0.2201, "step": 4390, "task_loss": 0.002458591014146805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.33634716272354126, "epoch": 2.09, "learning_rate": 1.1646555819477436e-05, "loss": 0.1559, "step": 4400, "task_loss": 0.2244289070367813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2994489073753357, "epoch": 2.1, "learning_rate": 1.1627553444180523e-05, "loss": 0.248, "step": 4410, "task_loss": 0.2632516026496887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.060996197164058685, "epoch": 2.1, "learning_rate": 1.1608551068883611e-05, "loss": 0.1764, "step": 4420, "task_loss": 0.022328753024339676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0799054354429245, "epoch": 2.1, "learning_rate": 1.15895486935867e-05, "loss": 0.2472, "step": 4430, "task_loss": 0.028038904070854187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4037141799926758, "epoch": 2.11, "learning_rate": 1.1570546318289789e-05, "loss": 0.2217, "step": 4440, "task_loss": 0.40060561895370483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.29084426164627075, "epoch": 2.11, "learning_rate": 1.1551543942992875e-05, "loss": 0.2478, "step": 4450, "task_loss": 0.22699187695980072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022717095911502838, "epoch": 2.12, "learning_rate": 1.1532541567695963e-05, "loss": 0.1458, "step": 4460, "task_loss": 0.007930740714073181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05607360601425171, "epoch": 2.12, "learning_rate": 1.151353919239905e-05, "loss": 0.2155, "step": 4470, "task_loss": 0.23636063933372498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0458001047372818, "epoch": 2.13, "learning_rate": 1.1494536817102138e-05, "loss": 0.2193, "step": 4480, "task_loss": 0.03247682377696037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.27424195408821106, "epoch": 2.13, "learning_rate": 1.1475534441805228e-05, "loss": 0.2339, "step": 4490, "task_loss": 0.16137200593948364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.33258911967277527, "epoch": 2.14, "learning_rate": 1.1456532066508314e-05, "loss": 0.2689, "step": 4500, "task_loss": 0.34519919753074646 }, { "epoch": 2.14, "eval_accuracy": 0.9048165137614679, "eval_loss": 0.3319544792175293, "eval_runtime": 22.0326, "eval_samples_per_second": 39.578, "eval_steps_per_second": 4.947, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06233971193432808, "epoch": 2.14, "learning_rate": 1.1437529691211402e-05, "loss": 0.2959, "step": 4510, "task_loss": 0.008978258818387985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19504857063293457, "epoch": 2.15, "learning_rate": 1.141852731591449e-05, "loss": 0.16, "step": 4520, "task_loss": 0.12643758952617645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02659631334245205, "epoch": 2.15, "learning_rate": 1.139952494061758e-05, "loss": 0.2226, "step": 4530, "task_loss": 0.003300584852695465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05742825195193291, "epoch": 2.16, "learning_rate": 1.1380522565320666e-05, "loss": 0.1707, "step": 4540, "task_loss": 0.27017584443092346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.197635218501091, "epoch": 2.16, "learning_rate": 1.1361520190023754e-05, "loss": 0.2152, "step": 4550, "task_loss": 0.17246709764003754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13968348503112793, "epoch": 2.17, "learning_rate": 1.1342517814726841e-05, "loss": 0.2187, "step": 4560, "task_loss": 0.3724798858165741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2040019929409027, "epoch": 2.17, "learning_rate": 1.1323515439429931e-05, "loss": 0.1535, "step": 4570, "task_loss": 0.049664292484521866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.570436954498291, "epoch": 2.18, "learning_rate": 1.1304513064133019e-05, "loss": 0.1904, "step": 4580, "task_loss": 0.3329172134399414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09249302744865417, "epoch": 2.18, "learning_rate": 1.1285510688836105e-05, "loss": 0.1101, "step": 4590, "task_loss": 0.21237826347351074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03505774587392807, "epoch": 2.19, "learning_rate": 1.1266508313539193e-05, "loss": 0.1319, "step": 4600, "task_loss": 0.4041575789451599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3652401864528656, "epoch": 2.19, "learning_rate": 1.124750593824228e-05, "loss": 0.1775, "step": 4610, "task_loss": 0.18814264237880707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16492074728012085, "epoch": 2.19, "learning_rate": 1.122850356294537e-05, "loss": 0.2287, "step": 4620, "task_loss": 0.16214123368263245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17321833968162537, "epoch": 2.2, "learning_rate": 1.1209501187648456e-05, "loss": 0.1745, "step": 4630, "task_loss": 0.1337524801492691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04696015268564224, "epoch": 2.2, "learning_rate": 1.1190498812351544e-05, "loss": 0.1773, "step": 4640, "task_loss": 0.011036917567253113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3878232538700104, "epoch": 2.21, "learning_rate": 1.1171496437054632e-05, "loss": 0.2254, "step": 4650, "task_loss": 0.28115496039390564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09996585547924042, "epoch": 2.21, "learning_rate": 1.1152494061757722e-05, "loss": 0.1769, "step": 4660, "task_loss": 0.1222931444644928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4124844968318939, "epoch": 2.22, "learning_rate": 1.113349168646081e-05, "loss": 0.2081, "step": 4670, "task_loss": 0.3749869763851166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13545894622802734, "epoch": 2.22, "learning_rate": 1.1114489311163896e-05, "loss": 0.0857, "step": 4680, "task_loss": 0.05755548179149628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1826428771018982, "epoch": 2.23, "learning_rate": 1.1095486935866984e-05, "loss": 0.2862, "step": 4690, "task_loss": 0.01588393747806549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1444108933210373, "epoch": 2.23, "learning_rate": 1.1076484560570073e-05, "loss": 0.1427, "step": 4700, "task_loss": 0.26151856780052185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1410968005657196, "epoch": 2.24, "learning_rate": 1.1057482185273161e-05, "loss": 0.1974, "step": 4710, "task_loss": 0.07411689311265945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1891230046749115, "epoch": 2.24, "learning_rate": 1.1038479809976247e-05, "loss": 0.1835, "step": 4720, "task_loss": 0.11419402062892914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10753890872001648, "epoch": 2.25, "learning_rate": 1.1019477434679335e-05, "loss": 0.201, "step": 4730, "task_loss": 0.05918397009372711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12443285435438156, "epoch": 2.25, "learning_rate": 1.1000475059382423e-05, "loss": 0.1185, "step": 4740, "task_loss": 0.06015586480498314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.39146068692207336, "epoch": 2.26, "learning_rate": 1.0981472684085512e-05, "loss": 0.1775, "step": 4750, "task_loss": 0.406578928232193 }, { "epoch": 2.26, "eval_accuracy": 0.9162844036697247, "eval_loss": 0.28384512662887573, "eval_runtime": 22.1283, "eval_samples_per_second": 39.407, "eval_steps_per_second": 4.926, "step": 4750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019335411489009857, "epoch": 2.26, "learning_rate": 1.09624703087886e-05, "loss": 0.166, "step": 4760, "task_loss": 0.16980049014091492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05398799106478691, "epoch": 2.27, "learning_rate": 1.0943467933491686e-05, "loss": 0.1554, "step": 4770, "task_loss": 0.007133938372135162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018566645681858063, "epoch": 2.27, "learning_rate": 1.0924465558194774e-05, "loss": 0.2266, "step": 4780, "task_loss": 0.005496695637702942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09009189158678055, "epoch": 2.28, "learning_rate": 1.0905463182897864e-05, "loss": 0.1098, "step": 4790, "task_loss": 0.04330487921833992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.29310229420661926, "epoch": 2.28, "learning_rate": 1.0886460807600952e-05, "loss": 0.2037, "step": 4800, "task_loss": 0.21032559871673584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.32484009861946106, "epoch": 2.29, "learning_rate": 1.086745843230404e-05, "loss": 0.1728, "step": 4810, "task_loss": 0.23763622343540192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.32907170057296753, "epoch": 2.29, "learning_rate": 1.0848456057007126e-05, "loss": 0.1479, "step": 4820, "task_loss": 0.27866876125335693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015447848476469517, "epoch": 2.29, "learning_rate": 1.0829453681710214e-05, "loss": 0.1612, "step": 4830, "task_loss": 0.003272462636232376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07381051778793335, "epoch": 2.3, "learning_rate": 1.0810451306413303e-05, "loss": 0.0796, "step": 4840, "task_loss": 0.13187864422798157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11486229300498962, "epoch": 2.3, "learning_rate": 1.0791448931116391e-05, "loss": 0.1633, "step": 4850, "task_loss": 0.17579086124897003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2775013744831085, "epoch": 2.31, "learning_rate": 1.0772446555819477e-05, "loss": 0.1521, "step": 4860, "task_loss": 0.34235501289367676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07822778820991516, "epoch": 2.31, "learning_rate": 1.0753444180522565e-05, "loss": 0.1217, "step": 4870, "task_loss": 0.017286375164985657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08122527599334717, "epoch": 2.32, "learning_rate": 1.0734441805225655e-05, "loss": 0.1756, "step": 4880, "task_loss": 0.24036413431167603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.8243784308433533, "epoch": 2.32, "learning_rate": 1.0715439429928743e-05, "loss": 0.2231, "step": 4890, "task_loss": 0.5445951819419861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.061645179986953735, "epoch": 2.33, "learning_rate": 1.069643705463183e-05, "loss": 0.1871, "step": 4900, "task_loss": 0.023713212460279465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11187595874071121, "epoch": 2.33, "learning_rate": 1.0677434679334917e-05, "loss": 0.1132, "step": 4910, "task_loss": 0.023900482803583145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026393216103315353, "epoch": 2.34, "learning_rate": 1.0658432304038006e-05, "loss": 0.1716, "step": 4920, "task_loss": 0.1366603672504425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01934751495718956, "epoch": 2.34, "learning_rate": 1.0639429928741094e-05, "loss": 0.1488, "step": 4930, "task_loss": 0.004630662500858307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019772972911596298, "epoch": 2.35, "learning_rate": 1.0620427553444182e-05, "loss": 0.123, "step": 4940, "task_loss": 0.004192207008600235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07974082976579666, "epoch": 2.35, "learning_rate": 1.0601425178147268e-05, "loss": 0.2069, "step": 4950, "task_loss": 0.02976549044251442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4843176603317261, "epoch": 2.36, "learning_rate": 1.0582422802850356e-05, "loss": 0.2589, "step": 4960, "task_loss": 0.5428990125656128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11596342921257019, "epoch": 2.36, "learning_rate": 1.0563420427553445e-05, "loss": 0.1505, "step": 4970, "task_loss": 0.40985506772994995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.31906628608703613, "epoch": 2.37, "learning_rate": 1.0544418052256533e-05, "loss": 0.2339, "step": 4980, "task_loss": 0.16161520779132843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08732592314481735, "epoch": 2.37, "learning_rate": 1.0525415676959621e-05, "loss": 0.1856, "step": 4990, "task_loss": 0.03401995077729225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16932491958141327, "epoch": 2.38, "learning_rate": 1.0506413301662707e-05, "loss": 0.1648, "step": 5000, "task_loss": 0.1186380535364151 }, { "epoch": 2.38, "eval_accuracy": 0.9128440366972477, "eval_loss": 0.2842116057872772, "eval_runtime": 22.1968, "eval_samples_per_second": 39.285, "eval_steps_per_second": 4.911, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4252215325832367, "epoch": 2.38, "learning_rate": 1.0487410926365797e-05, "loss": 0.1908, "step": 5010, "task_loss": 0.26106417179107666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.4758025109767914, "epoch": 2.38, "learning_rate": 1.0468408551068885e-05, "loss": 0.2279, "step": 5020, "task_loss": 0.26795437932014465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11485397070646286, "epoch": 2.39, "learning_rate": 1.0449406175771973e-05, "loss": 0.1344, "step": 5030, "task_loss": 0.10242946445941925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07752517610788345, "epoch": 2.39, "learning_rate": 1.0430403800475059e-05, "loss": 0.1472, "step": 5040, "task_loss": 0.23853403329849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23056337237358093, "epoch": 2.4, "learning_rate": 1.041140142517815e-05, "loss": 0.111, "step": 5050, "task_loss": 0.1433607041835785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.46395134925842285, "epoch": 2.4, "learning_rate": 1.0392399049881236e-05, "loss": 0.1743, "step": 5060, "task_loss": 0.2823677659034729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16598868370056152, "epoch": 2.41, "learning_rate": 1.0373396674584324e-05, "loss": 0.1343, "step": 5070, "task_loss": 0.03318723663687706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016996556892991066, "epoch": 2.41, "learning_rate": 1.0354394299287412e-05, "loss": 0.1427, "step": 5080, "task_loss": 0.006868541240692139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.6260548830032349, "epoch": 2.42, "learning_rate": 1.0335391923990498e-05, "loss": 0.2121, "step": 5090, "task_loss": 0.34184902906417847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0207875557243824, "epoch": 2.42, "learning_rate": 1.0316389548693588e-05, "loss": 0.1736, "step": 5100, "task_loss": 0.10779790580272675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11162707954645157, "epoch": 2.43, "learning_rate": 1.0297387173396676e-05, "loss": 0.1683, "step": 5110, "task_loss": 0.17393611371517181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10660862922668457, "epoch": 2.43, "learning_rate": 1.0278384798099763e-05, "loss": 0.2287, "step": 5120, "task_loss": 0.013046719133853912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11832073330879211, "epoch": 2.44, "learning_rate": 1.0259382422802851e-05, "loss": 0.1859, "step": 5130, "task_loss": 0.03974215313792229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19762274622917175, "epoch": 2.44, "learning_rate": 1.024038004750594e-05, "loss": 0.158, "step": 5140, "task_loss": 0.09981634467840195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.22084400057792664, "epoch": 2.45, "learning_rate": 1.0221377672209027e-05, "loss": 0.1586, "step": 5150, "task_loss": 0.10561450570821762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.42767268419265747, "epoch": 2.45, "learning_rate": 1.0202375296912115e-05, "loss": 0.1494, "step": 5160, "task_loss": 0.2538478970527649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03990669548511505, "epoch": 2.46, "learning_rate": 1.0183372921615203e-05, "loss": 0.1552, "step": 5170, "task_loss": 0.012294076383113861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0684843510389328, "epoch": 2.46, "learning_rate": 1.0164370546318289e-05, "loss": 0.1114, "step": 5180, "task_loss": 0.04257909581065178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10727253556251526, "epoch": 2.47, "learning_rate": 1.0145368171021378e-05, "loss": 0.1408, "step": 5190, "task_loss": 0.0835094004869461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1803852915763855, "epoch": 2.47, "learning_rate": 1.0126365795724466e-05, "loss": 0.1463, "step": 5200, "task_loss": 0.10350771993398666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.21467408537864685, "epoch": 2.48, "learning_rate": 1.0107363420427554e-05, "loss": 0.1776, "step": 5210, "task_loss": 0.12939737737178802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.49049556255340576, "epoch": 2.48, "learning_rate": 1.0088361045130642e-05, "loss": 0.187, "step": 5220, "task_loss": 0.4802893400192261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18884505331516266, "epoch": 2.48, "learning_rate": 1.0069358669833732e-05, "loss": 0.1488, "step": 5230, "task_loss": 0.09134702384471893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17404451966285706, "epoch": 2.49, "learning_rate": 1.0050356294536818e-05, "loss": 0.1596, "step": 5240, "task_loss": 0.08957971632480621 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012248549610376358, "epoch": 2.49, "learning_rate": 1.0031353919239906e-05, "loss": 0.1316, "step": 5250, "task_loss": 0.004954520612955093 }, { "epoch": 2.49, "eval_accuracy": 0.9162844036697247, "eval_loss": 0.2750292420387268, "eval_runtime": 22.048, "eval_samples_per_second": 39.55, "eval_steps_per_second": 4.944, "step": 5250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08270278573036194, "epoch": 2.5, "learning_rate": 1.0012351543942993e-05, "loss": 0.1146, "step": 5260, "task_loss": 0.04688364639878273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10440254956483841, "epoch": 2.5, "learning_rate": 9.993349168646081e-06, "loss": 0.1711, "step": 5270, "task_loss": 0.06673218309879303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.34925076365470886, "epoch": 2.51, "learning_rate": 9.97434679334917e-06, "loss": 0.2062, "step": 5280, "task_loss": 0.16814163327217102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.44582313299179077, "epoch": 2.51, "learning_rate": 9.955344418052257e-06, "loss": 0.2331, "step": 5290, "task_loss": 0.35081130266189575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02439703233540058, "epoch": 2.52, "learning_rate": 9.936342042755345e-06, "loss": 0.1357, "step": 5300, "task_loss": 0.003609389066696167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0913199633359909, "epoch": 2.52, "learning_rate": 9.917339667458433e-06, "loss": 0.1696, "step": 5310, "task_loss": 0.3426245152950287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02632327377796173, "epoch": 2.53, "learning_rate": 9.89833729216152e-06, "loss": 0.1167, "step": 5320, "task_loss": 0.005381196737289429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1984187364578247, "epoch": 2.53, "learning_rate": 9.879334916864608e-06, "loss": 0.1597, "step": 5330, "task_loss": 0.12277568876743317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16098986566066742, "epoch": 2.54, "learning_rate": 9.860332541567696e-06, "loss": 0.1252, "step": 5340, "task_loss": 0.081682950258255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.5197485685348511, "epoch": 2.54, "learning_rate": 9.841330166270784e-06, "loss": 0.1671, "step": 5350, "task_loss": 0.3930453062057495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0463409461081028, "epoch": 2.55, "learning_rate": 9.822327790973872e-06, "loss": 0.1617, "step": 5360, "task_loss": 0.01061389222741127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.042029425501823425, "epoch": 2.55, "learning_rate": 9.803325415676962e-06, "loss": 0.1519, "step": 5370, "task_loss": 0.027968235313892365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3101033866405487, "epoch": 2.56, "learning_rate": 9.784323040380048e-06, "loss": 0.193, "step": 5380, "task_loss": 0.16425858438014984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.7263270020484924, "epoch": 2.56, "learning_rate": 9.765320665083137e-06, "loss": 0.2224, "step": 5390, "task_loss": 0.7286182641983032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.040765728801488876, "epoch": 2.57, "learning_rate": 9.746318289786224e-06, "loss": 0.1389, "step": 5400, "task_loss": 0.011140488088130951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1607925444841385, "epoch": 2.57, "learning_rate": 9.727315914489311e-06, "loss": 0.1126, "step": 5410, "task_loss": 0.11116501688957214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0224788635969162, "epoch": 2.57, "learning_rate": 9.7083135391924e-06, "loss": 0.1352, "step": 5420, "task_loss": 0.005074281245470047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0271650031208992, "epoch": 2.58, "learning_rate": 9.689311163895487e-06, "loss": 0.105, "step": 5430, "task_loss": 0.012768540531396866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02819114923477173, "epoch": 2.58, "learning_rate": 9.670308788598575e-06, "loss": 0.2538, "step": 5440, "task_loss": 0.00484645739197731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18920451402664185, "epoch": 2.59, "learning_rate": 9.651306413301663e-06, "loss": 0.1027, "step": 5450, "task_loss": 0.24752211570739746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11157466471195221, "epoch": 2.59, "learning_rate": 9.632304038004752e-06, "loss": 0.162, "step": 5460, "task_loss": 0.1878063678741455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.26885926723480225, "epoch": 2.6, "learning_rate": 9.613301662707839e-06, "loss": 0.2035, "step": 5470, "task_loss": 0.058163829147815704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1357247531414032, "epoch": 2.6, "learning_rate": 9.594299287410928e-06, "loss": 0.2073, "step": 5480, "task_loss": 0.1269286572933197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05651300773024559, "epoch": 2.61, "learning_rate": 9.575296912114014e-06, "loss": 0.1478, "step": 5490, "task_loss": 0.014481060206890106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11179080605506897, "epoch": 2.61, "learning_rate": 9.556294536817104e-06, "loss": 0.2349, "step": 5500, "task_loss": 0.13668608665466309 }, { "epoch": 2.61, "eval_accuracy": 0.9231651376146789, "eval_loss": 0.24054142832756042, "eval_runtime": 22.021, "eval_samples_per_second": 39.599, "eval_steps_per_second": 4.95, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02737560123205185, "epoch": 2.62, "learning_rate": 9.53729216152019e-06, "loss": 0.1493, "step": 5510, "task_loss": 0.10180087387561798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12310200929641724, "epoch": 2.62, "learning_rate": 9.518289786223278e-06, "loss": 0.1098, "step": 5520, "task_loss": 0.058783046901226044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13475853204727173, "epoch": 2.63, "learning_rate": 9.499287410926367e-06, "loss": 0.1139, "step": 5530, "task_loss": 0.05447866767644882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17890390753746033, "epoch": 2.63, "learning_rate": 9.480285035629454e-06, "loss": 0.1686, "step": 5540, "task_loss": 0.22207100689411163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05488044023513794, "epoch": 2.64, "learning_rate": 9.461282660332543e-06, "loss": 0.1037, "step": 5550, "task_loss": 0.2198052704334259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07350677251815796, "epoch": 2.64, "learning_rate": 9.44228028503563e-06, "loss": 0.1472, "step": 5560, "task_loss": 0.15250588953495026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23793400824069977, "epoch": 2.65, "learning_rate": 9.423277909738719e-06, "loss": 0.187, "step": 5570, "task_loss": 0.17216888070106506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3636675775051117, "epoch": 2.65, "learning_rate": 9.404275534441805e-06, "loss": 0.1792, "step": 5580, "task_loss": 0.21685606241226196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05343058705329895, "epoch": 2.66, "learning_rate": 9.385273159144895e-06, "loss": 0.1529, "step": 5590, "task_loss": 0.012824393808841705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.21820136904716492, "epoch": 2.66, "learning_rate": 9.36627078384798e-06, "loss": 0.2066, "step": 5600, "task_loss": 0.10303452610969543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02510383166372776, "epoch": 2.67, "learning_rate": 9.34726840855107e-06, "loss": 0.0728, "step": 5610, "task_loss": 0.004608385264873505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2674543261528015, "epoch": 2.67, "learning_rate": 9.328266033254158e-06, "loss": 0.146, "step": 5620, "task_loss": 0.2036900818347931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2999550402164459, "epoch": 2.67, "learning_rate": 9.309263657957246e-06, "loss": 0.1655, "step": 5630, "task_loss": 0.1722106784582138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08812573552131653, "epoch": 2.68, "learning_rate": 9.290261282660334e-06, "loss": 0.1105, "step": 5640, "task_loss": 0.041734665632247925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014322986826300621, "epoch": 2.68, "learning_rate": 9.27125890736342e-06, "loss": 0.0986, "step": 5650, "task_loss": 0.11941255629062653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12577423453330994, "epoch": 2.69, "learning_rate": 9.25225653206651e-06, "loss": 0.1373, "step": 5660, "task_loss": 0.3789353370666504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0945727676153183, "epoch": 2.69, "learning_rate": 9.233254156769596e-06, "loss": 0.1489, "step": 5670, "task_loss": 0.14805974066257477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.27903738617897034, "epoch": 2.7, "learning_rate": 9.214251781472685e-06, "loss": 0.1515, "step": 5680, "task_loss": 0.14870142936706543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1626128852367401, "epoch": 2.7, "learning_rate": 9.195249406175773e-06, "loss": 0.149, "step": 5690, "task_loss": 0.08122530579566956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.055356357246637344, "epoch": 2.71, "learning_rate": 9.176247030878861e-06, "loss": 0.1217, "step": 5700, "task_loss": 0.16854631900787354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1662861853837967, "epoch": 2.71, "learning_rate": 9.157244655581949e-06, "loss": 0.1675, "step": 5710, "task_loss": 0.013219501823186874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.20596551895141602, "epoch": 2.72, "learning_rate": 9.138242280285037e-06, "loss": 0.2061, "step": 5720, "task_loss": 0.1961277425289154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2168876677751541, "epoch": 2.72, "learning_rate": 9.119239904988125e-06, "loss": 0.1496, "step": 5730, "task_loss": 0.2229800522327423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.044798918068408966, "epoch": 2.73, "learning_rate": 9.100237529691213e-06, "loss": 0.114, "step": 5740, "task_loss": 0.005338538438081741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13803905248641968, "epoch": 2.73, "learning_rate": 9.0812351543943e-06, "loss": 0.066, "step": 5750, "task_loss": 0.06024959683418274 }, { "epoch": 2.73, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.26952359080314636, "eval_runtime": 22.0206, "eval_samples_per_second": 39.599, "eval_steps_per_second": 4.95, "step": 5750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009917501360177994, "epoch": 2.74, "learning_rate": 9.062232779097387e-06, "loss": 0.1051, "step": 5760, "task_loss": 0.0061325803399086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08573028445243835, "epoch": 2.74, "learning_rate": 9.043230403800476e-06, "loss": 0.1055, "step": 5770, "task_loss": 0.05740174278616905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07769911736249924, "epoch": 2.75, "learning_rate": 9.024228028503564e-06, "loss": 0.1586, "step": 5780, "task_loss": 0.019947297871112823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04432743787765503, "epoch": 2.75, "learning_rate": 9.005225653206652e-06, "loss": 0.1275, "step": 5790, "task_loss": 0.17999230325222015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1737172156572342, "epoch": 2.76, "learning_rate": 8.98622327790974e-06, "loss": 0.2693, "step": 5800, "task_loss": 0.18654870986938477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021758923307061195, "epoch": 2.76, "learning_rate": 8.967220902612828e-06, "loss": 0.119, "step": 5810, "task_loss": 0.0038488097488880157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011074014008045197, "epoch": 2.76, "learning_rate": 8.948218527315915e-06, "loss": 0.1163, "step": 5820, "task_loss": 0.0033141709864139557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23885256052017212, "epoch": 2.77, "learning_rate": 8.929216152019003e-06, "loss": 0.1698, "step": 5830, "task_loss": 0.5643174052238464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.046549778431653976, "epoch": 2.77, "learning_rate": 8.910213776722091e-06, "loss": 0.0801, "step": 5840, "task_loss": 0.00456850603222847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09816709160804749, "epoch": 2.78, "learning_rate": 8.891211401425179e-06, "loss": 0.1631, "step": 5850, "task_loss": 0.12905465066432953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08407769352197647, "epoch": 2.78, "learning_rate": 8.872209026128267e-06, "loss": 0.1879, "step": 5860, "task_loss": 0.03167784959077835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2762501537799835, "epoch": 2.79, "learning_rate": 8.853206650831355e-06, "loss": 0.1614, "step": 5870, "task_loss": 0.16979815065860748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12890750169754028, "epoch": 2.79, "learning_rate": 8.834204275534443e-06, "loss": 0.1169, "step": 5880, "task_loss": 0.04023109748959541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02156493254005909, "epoch": 2.8, "learning_rate": 8.81520190023753e-06, "loss": 0.0952, "step": 5890, "task_loss": 0.0034607164561748505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.007742735557258129, "epoch": 2.8, "learning_rate": 8.796199524940618e-06, "loss": 0.1524, "step": 5900, "task_loss": 0.0028364397585392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1480652093887329, "epoch": 2.81, "learning_rate": 8.777197149643706e-06, "loss": 0.1092, "step": 5910, "task_loss": 0.29450657963752747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17767339944839478, "epoch": 2.81, "learning_rate": 8.758194774346794e-06, "loss": 0.2108, "step": 5920, "task_loss": 0.10835998505353928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.26279133558273315, "epoch": 2.82, "learning_rate": 8.739192399049882e-06, "loss": 0.169, "step": 5930, "task_loss": 0.23328514397144318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015537131577730179, "epoch": 2.82, "learning_rate": 8.72019002375297e-06, "loss": 0.1353, "step": 5940, "task_loss": 0.02001919597387314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07597079128026962, "epoch": 2.83, "learning_rate": 8.701187648456058e-06, "loss": 0.0961, "step": 5950, "task_loss": 0.11477569490671158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11929579079151154, "epoch": 2.83, "learning_rate": 8.682185273159146e-06, "loss": 0.1904, "step": 5960, "task_loss": 0.04400225356221199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.24032776057720184, "epoch": 2.84, "learning_rate": 8.663182897862233e-06, "loss": 0.1713, "step": 5970, "task_loss": 0.16706398129463196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.039569027721881866, "epoch": 2.84, "learning_rate": 8.644180522565321e-06, "loss": 0.079, "step": 5980, "task_loss": 0.009293723851442337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.14112095534801483, "epoch": 2.85, "learning_rate": 8.625178147268409e-06, "loss": 0.1912, "step": 5990, "task_loss": 0.12100718915462494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2663877308368683, "epoch": 2.85, "learning_rate": 8.606175771971497e-06, "loss": 0.1285, "step": 6000, "task_loss": 0.1642446219921112 }, { "epoch": 2.85, "eval_accuracy": 0.9094036697247706, "eval_loss": 0.3016970455646515, "eval_runtime": 21.9287, "eval_samples_per_second": 39.765, "eval_steps_per_second": 4.971, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02504299208521843, "epoch": 2.86, "learning_rate": 8.587173396674585e-06, "loss": 0.0738, "step": 6010, "task_loss": 0.004753179848194122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05768204480409622, "epoch": 2.86, "learning_rate": 8.570071258907364e-06, "loss": 0.1474, "step": 6020, "task_loss": 0.03982783481478691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04381980746984482, "epoch": 2.86, "learning_rate": 8.551068883610452e-06, "loss": 0.0811, "step": 6030, "task_loss": 0.014950472861528397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10156150907278061, "epoch": 2.87, "learning_rate": 8.53206650831354e-06, "loss": 0.0831, "step": 6040, "task_loss": 0.10873029381036758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07243537902832031, "epoch": 2.87, "learning_rate": 8.513064133016627e-06, "loss": 0.146, "step": 6050, "task_loss": 0.029440071433782578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.27121058106422424, "epoch": 2.88, "learning_rate": 8.494061757719715e-06, "loss": 0.1812, "step": 6060, "task_loss": 0.30982720851898193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23871323466300964, "epoch": 2.88, "learning_rate": 8.475059382422803e-06, "loss": 0.1833, "step": 6070, "task_loss": 0.17489267885684967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12207458168268204, "epoch": 2.89, "learning_rate": 8.456057007125893e-06, "loss": 0.1351, "step": 6080, "task_loss": 0.008778557181358337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09064328670501709, "epoch": 2.89, "learning_rate": 8.437054631828979e-06, "loss": 0.0597, "step": 6090, "task_loss": 0.026089288294315338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.28841766715049744, "epoch": 2.9, "learning_rate": 8.418052256532068e-06, "loss": 0.162, "step": 6100, "task_loss": 0.1540592610836029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.049679264426231384, "epoch": 2.9, "learning_rate": 8.399049881235155e-06, "loss": 0.0938, "step": 6110, "task_loss": 0.012325655668973923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.031312961131334305, "epoch": 2.91, "learning_rate": 8.380047505938242e-06, "loss": 0.1657, "step": 6120, "task_loss": 0.003661230206489563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0725235864520073, "epoch": 2.91, "learning_rate": 8.36104513064133e-06, "loss": 0.142, "step": 6130, "task_loss": 0.029958881437778473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19128333032131195, "epoch": 2.92, "learning_rate": 8.342042755344418e-06, "loss": 0.1463, "step": 6140, "task_loss": 0.11310219764709473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024800153449177742, "epoch": 2.92, "learning_rate": 8.323040380047506e-06, "loss": 0.1464, "step": 6150, "task_loss": 0.007242865860462189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0960264801979065, "epoch": 2.93, "learning_rate": 8.304038004750594e-06, "loss": 0.1254, "step": 6160, "task_loss": 0.10413940250873566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029338371008634567, "epoch": 2.93, "learning_rate": 8.285035629453683e-06, "loss": 0.1295, "step": 6170, "task_loss": 0.010850280523300171 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.40849167108535767, "epoch": 2.94, "learning_rate": 8.26603325415677e-06, "loss": 0.1832, "step": 6180, "task_loss": 0.32399341464042664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017780784517526627, "epoch": 2.94, "learning_rate": 8.247030878859859e-06, "loss": 0.1762, "step": 6190, "task_loss": 0.004788093268871307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.20082998275756836, "epoch": 2.95, "learning_rate": 8.228028503562945e-06, "loss": 0.1738, "step": 6200, "task_loss": 0.2769722044467926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09029912948608398, "epoch": 2.95, "learning_rate": 8.209026128266035e-06, "loss": 0.1472, "step": 6210, "task_loss": 0.22967661917209625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03564874455332756, "epoch": 2.95, "learning_rate": 8.190023752969121e-06, "loss": 0.1332, "step": 6220, "task_loss": 0.011339064687490463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06550465524196625, "epoch": 2.96, "learning_rate": 8.171021377672209e-06, "loss": 0.0727, "step": 6230, "task_loss": 0.03282541409134865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021643269807100296, "epoch": 2.96, "learning_rate": 8.152019002375298e-06, "loss": 0.1414, "step": 6240, "task_loss": 0.00811653584241867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2023080289363861, "epoch": 2.97, "learning_rate": 8.133016627078385e-06, "loss": 0.1813, "step": 6250, "task_loss": 0.1566222906112671 }, { "epoch": 2.97, "eval_accuracy": 0.9105504587155964, "eval_loss": 0.347153902053833, "eval_runtime": 22.058, "eval_samples_per_second": 39.532, "eval_steps_per_second": 4.942, "step": 6250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.40393316745758057, "epoch": 2.97, "learning_rate": 8.114014251781474e-06, "loss": 0.2437, "step": 6260, "task_loss": 0.2824239134788513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04513658583164215, "epoch": 2.98, "learning_rate": 8.09501187648456e-06, "loss": 0.1042, "step": 6270, "task_loss": 0.007071588188409805 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01414411049336195, "epoch": 2.98, "learning_rate": 8.07600950118765e-06, "loss": 0.0815, "step": 6280, "task_loss": 0.010898426175117493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.27671492099761963, "epoch": 2.99, "learning_rate": 8.057007125890736e-06, "loss": 0.1583, "step": 6290, "task_loss": 0.4247187376022339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04388013109564781, "epoch": 2.99, "learning_rate": 8.038004750593826e-06, "loss": 0.1534, "step": 6300, "task_loss": 0.009620524942874908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04090433940291405, "epoch": 3.0, "learning_rate": 8.019002375296912e-06, "loss": 0.1315, "step": 6310, "task_loss": 0.004594910889863968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026270296424627304, "epoch": 3.0, "learning_rate": 8.000000000000001e-06, "loss": 0.1069, "step": 6320, "task_loss": 0.0037337057292461395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16657724976539612, "epoch": 3.01, "learning_rate": 7.98099762470309e-06, "loss": 0.0828, "step": 6330, "task_loss": 0.17973756790161133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022658517584204674, "epoch": 3.01, "learning_rate": 7.961995249406177e-06, "loss": 0.095, "step": 6340, "task_loss": 0.008285708725452423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03448961302638054, "epoch": 3.02, "learning_rate": 7.942992874109265e-06, "loss": 0.0677, "step": 6350, "task_loss": 0.02081955224275589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020961280912160873, "epoch": 3.02, "learning_rate": 7.923990498812351e-06, "loss": 0.0523, "step": 6360, "task_loss": 0.24772392213344574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09194082766771317, "epoch": 3.03, "learning_rate": 7.90498812351544e-06, "loss": 0.0895, "step": 6370, "task_loss": 0.3988223373889923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15075421333312988, "epoch": 3.03, "learning_rate": 7.885985748218527e-06, "loss": 0.1081, "step": 6380, "task_loss": 0.06303508579730988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012541974894702435, "epoch": 3.04, "learning_rate": 7.866983372921616e-06, "loss": 0.0863, "step": 6390, "task_loss": 0.004561152309179306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021147001534700394, "epoch": 3.04, "learning_rate": 7.847980997624704e-06, "loss": 0.0674, "step": 6400, "task_loss": 0.0043773651123046875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016194012016057968, "epoch": 3.05, "learning_rate": 7.828978622327792e-06, "loss": 0.1634, "step": 6410, "task_loss": 0.0029691122472286224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04040956124663353, "epoch": 3.05, "learning_rate": 7.80997624703088e-06, "loss": 0.0932, "step": 6420, "task_loss": 0.006615336984395981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1302649974822998, "epoch": 3.05, "learning_rate": 7.790973871733968e-06, "loss": 0.0852, "step": 6430, "task_loss": 0.08950361609458923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026604020968079567, "epoch": 3.06, "learning_rate": 7.771971496437056e-06, "loss": 0.0547, "step": 6440, "task_loss": 0.19832749664783478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1625809371471405, "epoch": 3.06, "learning_rate": 7.752969121140144e-06, "loss": 0.0843, "step": 6450, "task_loss": 0.07263679802417755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3386070132255554, "epoch": 3.07, "learning_rate": 7.733966745843231e-06, "loss": 0.1032, "step": 6460, "task_loss": 0.3932771682739258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02958713099360466, "epoch": 3.07, "learning_rate": 7.714964370546318e-06, "loss": 0.0915, "step": 6470, "task_loss": 0.002492375671863556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03094402514398098, "epoch": 3.08, "learning_rate": 7.695961995249407e-06, "loss": 0.1083, "step": 6480, "task_loss": 0.006007764488458633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020859267562627792, "epoch": 3.08, "learning_rate": 7.676959619952495e-06, "loss": 0.0465, "step": 6490, "task_loss": 0.00389765202999115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17978915572166443, "epoch": 3.09, "learning_rate": 7.657957244655583e-06, "loss": 0.078, "step": 6500, "task_loss": 0.09985024482011795 }, { "epoch": 3.09, "eval_accuracy": 0.9139908256880734, "eval_loss": 0.2914510667324066, "eval_runtime": 22.4643, "eval_samples_per_second": 38.817, "eval_steps_per_second": 4.852, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07285353541374207, "epoch": 3.09, "learning_rate": 7.63895486935867e-06, "loss": 0.0599, "step": 6510, "task_loss": 0.21172893047332764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016873065382242203, "epoch": 3.1, "learning_rate": 7.619952494061759e-06, "loss": 0.0994, "step": 6520, "task_loss": 0.1662091165781021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15915945172309875, "epoch": 3.1, "learning_rate": 7.600950118764846e-06, "loss": 0.1219, "step": 6530, "task_loss": 0.2617415487766266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16674910485744476, "epoch": 3.11, "learning_rate": 7.581947743467934e-06, "loss": 0.0909, "step": 6540, "task_loss": 0.12426068633794785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0346527174115181, "epoch": 3.11, "learning_rate": 7.562945368171022e-06, "loss": 0.0854, "step": 6550, "task_loss": 0.012002792209386826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08293355256319046, "epoch": 3.12, "learning_rate": 7.54394299287411e-06, "loss": 0.0816, "step": 6560, "task_loss": 0.08816082775592804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.054979730397462845, "epoch": 3.12, "learning_rate": 7.524940617577198e-06, "loss": 0.0739, "step": 6570, "task_loss": 0.5423688292503357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021432993933558464, "epoch": 3.13, "learning_rate": 7.505938242280285e-06, "loss": 0.0644, "step": 6580, "task_loss": 0.005201835185289383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11038654297590256, "epoch": 3.13, "learning_rate": 7.486935866983374e-06, "loss": 0.0679, "step": 6590, "task_loss": 0.10889068990945816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011515101417899132, "epoch": 3.14, "learning_rate": 7.467933491686461e-06, "loss": 0.1239, "step": 6600, "task_loss": 0.0029702894389629364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.027960218489170074, "epoch": 3.14, "learning_rate": 7.448931116389549e-06, "loss": 0.0788, "step": 6610, "task_loss": 0.005036883056163788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018560441210865974, "epoch": 3.14, "learning_rate": 7.429928741092637e-06, "loss": 0.0824, "step": 6620, "task_loss": 0.009363945573568344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08277207612991333, "epoch": 3.15, "learning_rate": 7.410926365795725e-06, "loss": 0.145, "step": 6630, "task_loss": 0.23960661888122559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024465510621666908, "epoch": 3.15, "learning_rate": 7.391923990498813e-06, "loss": 0.0744, "step": 6640, "task_loss": 0.09361692517995834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.28370052576065063, "epoch": 3.16, "learning_rate": 7.372921615201901e-06, "loss": 0.0718, "step": 6650, "task_loss": 0.18601828813552856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1300351619720459, "epoch": 3.16, "learning_rate": 7.353919239904989e-06, "loss": 0.1288, "step": 6660, "task_loss": 0.06523597240447998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04163939133286476, "epoch": 3.17, "learning_rate": 7.334916864608077e-06, "loss": 0.0974, "step": 6670, "task_loss": 0.17136383056640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018006887286901474, "epoch": 3.17, "learning_rate": 7.315914489311164e-06, "loss": 0.0903, "step": 6680, "task_loss": 0.005319155752658844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.24133111536502838, "epoch": 3.18, "learning_rate": 7.296912114014253e-06, "loss": 0.1113, "step": 6690, "task_loss": 0.2049761414527893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18800024688243866, "epoch": 3.18, "learning_rate": 7.27790973871734e-06, "loss": 0.0828, "step": 6700, "task_loss": 0.10774320363998413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05912996828556061, "epoch": 3.19, "learning_rate": 7.258907363420428e-06, "loss": 0.1459, "step": 6710, "task_loss": 0.024521011859178543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.042738813906908035, "epoch": 3.19, "learning_rate": 7.239904988123516e-06, "loss": 0.0925, "step": 6720, "task_loss": 0.00665607675909996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08050870150327682, "epoch": 3.2, "learning_rate": 7.220902612826604e-06, "loss": 0.0643, "step": 6730, "task_loss": 0.22327980399131775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23609286546707153, "epoch": 3.2, "learning_rate": 7.201900237529692e-06, "loss": 0.1105, "step": 6740, "task_loss": 0.15705671906471252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.00854767207056284, "epoch": 3.21, "learning_rate": 7.1828978622327794e-06, "loss": 0.0886, "step": 6750, "task_loss": 0.003407653421163559 }, { "epoch": 3.21, "eval_accuracy": 0.9151376146788991, "eval_loss": 0.28525349497795105, "eval_runtime": 22.3514, "eval_samples_per_second": 39.013, "eval_steps_per_second": 4.877, "step": 6750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014828482642769814, "epoch": 3.21, "learning_rate": 7.163895486935868e-06, "loss": 0.0819, "step": 6760, "task_loss": 0.22838379442691803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05267515778541565, "epoch": 3.22, "learning_rate": 7.144893111638955e-06, "loss": 0.0694, "step": 6770, "task_loss": 0.17177042365074158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11514155566692352, "epoch": 3.22, "learning_rate": 7.125890736342044e-06, "loss": 0.1022, "step": 6780, "task_loss": 0.04417066648602486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10625005513429642, "epoch": 3.23, "learning_rate": 7.106888361045131e-06, "loss": 0.0949, "step": 6790, "task_loss": 0.06190282851457596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23833660781383514, "epoch": 3.23, "learning_rate": 7.08788598574822e-06, "loss": 0.1509, "step": 6800, "task_loss": 0.3238193392753601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.060429543256759644, "epoch": 3.24, "learning_rate": 7.068883610451307e-06, "loss": 0.0693, "step": 6810, "task_loss": 0.3467991352081299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05774849280714989, "epoch": 3.24, "learning_rate": 7.0498812351543945e-06, "loss": 0.0708, "step": 6820, "task_loss": 0.02291129156947136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018570270389318466, "epoch": 3.24, "learning_rate": 7.030878859857483e-06, "loss": 0.066, "step": 6830, "task_loss": 0.005066726356744766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06323438882827759, "epoch": 3.25, "learning_rate": 7.01187648456057e-06, "loss": 0.0893, "step": 6840, "task_loss": 0.04271535202860832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02259252592921257, "epoch": 3.25, "learning_rate": 6.992874109263659e-06, "loss": 0.0495, "step": 6850, "task_loss": 0.08561189472675323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03591727092862129, "epoch": 3.26, "learning_rate": 6.973871733966746e-06, "loss": 0.0788, "step": 6860, "task_loss": 0.005202621221542358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03306454047560692, "epoch": 3.26, "learning_rate": 6.954869358669835e-06, "loss": 0.1167, "step": 6870, "task_loss": 0.0036557093262672424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10268350690603256, "epoch": 3.27, "learning_rate": 6.935866983372922e-06, "loss": 0.0842, "step": 6880, "task_loss": 0.11851201951503754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03656313568353653, "epoch": 3.27, "learning_rate": 6.91686460807601e-06, "loss": 0.0654, "step": 6890, "task_loss": 0.009406276047229767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07688532024621964, "epoch": 3.28, "learning_rate": 6.897862232779098e-06, "loss": 0.0885, "step": 6900, "task_loss": 0.09606030583381653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021331768482923508, "epoch": 3.28, "learning_rate": 6.878859857482186e-06, "loss": 0.1026, "step": 6910, "task_loss": 0.003994014114141464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03601570054888725, "epoch": 3.29, "learning_rate": 6.859857482185274e-06, "loss": 0.1132, "step": 6920, "task_loss": 0.1425406038761139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.054355375468730927, "epoch": 3.29, "learning_rate": 6.840855106888361e-06, "loss": 0.1151, "step": 6930, "task_loss": 0.1992948204278946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026305008679628372, "epoch": 3.3, "learning_rate": 6.82185273159145e-06, "loss": 0.0999, "step": 6940, "task_loss": 0.008063357323408127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.14803074300289154, "epoch": 3.3, "learning_rate": 6.802850356294537e-06, "loss": 0.0867, "step": 6950, "task_loss": 0.21300971508026123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10545548796653748, "epoch": 3.31, "learning_rate": 6.783847980997625e-06, "loss": 0.0741, "step": 6960, "task_loss": 0.06187749281525612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2063077986240387, "epoch": 3.31, "learning_rate": 6.764845605700712e-06, "loss": 0.1095, "step": 6970, "task_loss": 0.27536579966545105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012730289250612259, "epoch": 3.32, "learning_rate": 6.745843230403801e-06, "loss": 0.1128, "step": 6980, "task_loss": 0.005988124758005142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024963170289993286, "epoch": 3.32, "learning_rate": 6.726840855106889e-06, "loss": 0.1089, "step": 6990, "task_loss": 0.006193935871124268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1125224307179451, "epoch": 3.33, "learning_rate": 6.707838479809977e-06, "loss": 0.117, "step": 7000, "task_loss": 0.25286245346069336 }, { "epoch": 3.33, "eval_accuracy": 0.9185779816513762, "eval_loss": 0.2689138948917389, "eval_runtime": 22.4717, "eval_samples_per_second": 38.804, "eval_steps_per_second": 4.851, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07371003925800323, "epoch": 3.33, "learning_rate": 6.688836104513065e-06, "loss": 0.0961, "step": 7010, "task_loss": 0.0624442957341671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08843027800321579, "epoch": 3.33, "learning_rate": 6.669833729216153e-06, "loss": 0.053, "step": 7020, "task_loss": 0.22307521104812622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01580330729484558, "epoch": 3.34, "learning_rate": 6.6508313539192404e-06, "loss": 0.0582, "step": 7030, "task_loss": 0.021607249975204468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3695874810218811, "epoch": 3.34, "learning_rate": 6.631828978622329e-06, "loss": 0.1369, "step": 7040, "task_loss": 0.4205509424209595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.025944426655769348, "epoch": 3.35, "learning_rate": 6.612826603325416e-06, "loss": 0.054, "step": 7050, "task_loss": 0.18624553084373474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.26268765330314636, "epoch": 3.35, "learning_rate": 6.593824228028504e-06, "loss": 0.1181, "step": 7060, "task_loss": 0.2383035570383072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018553506582975388, "epoch": 3.36, "learning_rate": 6.574821852731592e-06, "loss": 0.1264, "step": 7070, "task_loss": 0.002540022134780884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.30001404881477356, "epoch": 3.36, "learning_rate": 6.55581947743468e-06, "loss": 0.1015, "step": 7080, "task_loss": 0.4227195382118225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017469489946961403, "epoch": 3.37, "learning_rate": 6.536817102137768e-06, "loss": 0.0563, "step": 7090, "task_loss": 0.005384139716625214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2690996825695038, "epoch": 3.37, "learning_rate": 6.5178147268408555e-06, "loss": 0.0829, "step": 7100, "task_loss": 0.19411098957061768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012529075145721436, "epoch": 3.38, "learning_rate": 6.498812351543944e-06, "loss": 0.0957, "step": 7110, "task_loss": 0.00231257826089859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015447025187313557, "epoch": 3.38, "learning_rate": 6.479809976247031e-06, "loss": 0.0599, "step": 7120, "task_loss": 0.0031574219465255737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016959063708782196, "epoch": 3.39, "learning_rate": 6.46080760095012e-06, "loss": 0.0663, "step": 7130, "task_loss": 0.15422838926315308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015068082138895988, "epoch": 3.39, "learning_rate": 6.441805225653207e-06, "loss": 0.0898, "step": 7140, "task_loss": 0.004132535308599472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2646453082561493, "epoch": 3.4, "learning_rate": 6.422802850356296e-06, "loss": 0.0993, "step": 7150, "task_loss": 0.17641893029212952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09765015542507172, "epoch": 3.4, "learning_rate": 6.403800475059383e-06, "loss": 0.0545, "step": 7160, "task_loss": 0.0811493992805481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.042697787284851074, "epoch": 3.41, "learning_rate": 6.3847980997624705e-06, "loss": 0.0704, "step": 7170, "task_loss": 0.21445339918136597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23853538930416107, "epoch": 3.41, "learning_rate": 6.365795724465559e-06, "loss": 0.0793, "step": 7180, "task_loss": 0.15609657764434814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09077732264995575, "epoch": 3.42, "learning_rate": 6.346793349168646e-06, "loss": 0.1064, "step": 7190, "task_loss": 0.3665331304073334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021392345428466797, "epoch": 3.42, "learning_rate": 6.327790973871735e-06, "loss": 0.1384, "step": 7200, "task_loss": 0.18095025420188904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2543436586856842, "epoch": 3.43, "learning_rate": 6.308788598574822e-06, "loss": 0.1032, "step": 7210, "task_loss": 0.13427653908729553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02127661183476448, "epoch": 3.43, "learning_rate": 6.289786223277911e-06, "loss": 0.1165, "step": 7220, "task_loss": 0.004494883120059967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029238495975732803, "epoch": 3.43, "learning_rate": 6.270783847980998e-06, "loss": 0.0929, "step": 7230, "task_loss": 0.0037034451961517334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0373212993144989, "epoch": 3.44, "learning_rate": 6.251781472684086e-06, "loss": 0.1337, "step": 7240, "task_loss": 0.005556315183639526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23062750697135925, "epoch": 3.44, "learning_rate": 6.232779097387173e-06, "loss": 0.0894, "step": 7250, "task_loss": 0.34356802701950073 }, { "epoch": 3.44, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.27475783228874207, "eval_runtime": 22.2988, "eval_samples_per_second": 39.105, "eval_steps_per_second": 4.888, "step": 7250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12996672093868256, "epoch": 3.45, "learning_rate": 6.213776722090262e-06, "loss": 0.0485, "step": 7260, "task_loss": 0.1863107681274414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19036176800727844, "epoch": 3.45, "learning_rate": 6.19477434679335e-06, "loss": 0.0722, "step": 7270, "task_loss": 0.15313661098480225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.24250520765781403, "epoch": 3.46, "learning_rate": 6.175771971496437e-06, "loss": 0.087, "step": 7280, "task_loss": 0.34011194109916687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13349878787994385, "epoch": 3.46, "learning_rate": 6.156769596199526e-06, "loss": 0.0665, "step": 7290, "task_loss": 0.2456045001745224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03384882211685181, "epoch": 3.47, "learning_rate": 6.137767220902613e-06, "loss": 0.1172, "step": 7300, "task_loss": 0.0046829357743263245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02070469781756401, "epoch": 3.47, "learning_rate": 6.1187648456057014e-06, "loss": 0.1301, "step": 7310, "task_loss": 0.005317840725183487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06323584169149399, "epoch": 3.48, "learning_rate": 6.0997624703087884e-06, "loss": 0.0884, "step": 7320, "task_loss": 0.01452043280005455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.059165108948946, "epoch": 3.48, "learning_rate": 6.080760095011877e-06, "loss": 0.0623, "step": 7330, "task_loss": 0.02391085773706436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11576695740222931, "epoch": 3.49, "learning_rate": 6.061757719714965e-06, "loss": 0.0891, "step": 7340, "task_loss": 0.19836050271987915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012924795970320702, "epoch": 3.49, "learning_rate": 6.042755344418053e-06, "loss": 0.0451, "step": 7350, "task_loss": 0.13838058710098267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029066815972328186, "epoch": 3.5, "learning_rate": 6.023752969121141e-06, "loss": 0.1052, "step": 7360, "task_loss": 0.01864166557788849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13578568398952484, "epoch": 3.5, "learning_rate": 6.004750593824229e-06, "loss": 0.1078, "step": 7370, "task_loss": 0.08328451216220856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.23648998141288757, "epoch": 3.51, "learning_rate": 5.9857482185273165e-06, "loss": 0.0746, "step": 7380, "task_loss": 0.22922304272651672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09257731586694717, "epoch": 3.51, "learning_rate": 5.9667458432304035e-06, "loss": 0.0938, "step": 7390, "task_loss": 0.09933258593082428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17856569588184357, "epoch": 3.52, "learning_rate": 5.947743467933492e-06, "loss": 0.1055, "step": 7400, "task_loss": 0.15133805572986603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009972669184207916, "epoch": 3.52, "learning_rate": 5.928741092636579e-06, "loss": 0.0526, "step": 7410, "task_loss": 0.003201443701982498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08634195476770401, "epoch": 3.52, "learning_rate": 5.909738717339668e-06, "loss": 0.1139, "step": 7420, "task_loss": 0.1812535524368286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013718021102249622, "epoch": 3.53, "learning_rate": 5.890736342042756e-06, "loss": 0.0834, "step": 7430, "task_loss": 0.0068436190485954285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015075819566845894, "epoch": 3.53, "learning_rate": 5.871733966745844e-06, "loss": 0.0571, "step": 7440, "task_loss": 0.13153530657291412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1445593535900116, "epoch": 3.54, "learning_rate": 5.8527315914489315e-06, "loss": 0.0546, "step": 7450, "task_loss": 0.07850364595651627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1473163664340973, "epoch": 3.54, "learning_rate": 5.83372921615202e-06, "loss": 0.1104, "step": 7460, "task_loss": 0.15125451982021332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06551199406385422, "epoch": 3.55, "learning_rate": 5.814726840855107e-06, "loss": 0.0888, "step": 7470, "task_loss": 0.034045103937387466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.21904179453849792, "epoch": 3.55, "learning_rate": 5.795724465558196e-06, "loss": 0.098, "step": 7480, "task_loss": 0.2412600815296173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.031937625259160995, "epoch": 3.56, "learning_rate": 5.776722090261283e-06, "loss": 0.0702, "step": 7490, "task_loss": 0.015268594026565552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06982429325580597, "epoch": 3.56, "learning_rate": 5.757719714964372e-06, "loss": 0.1023, "step": 7500, "task_loss": 0.3576207160949707 }, { "epoch": 3.56, "eval_accuracy": 0.9094036697247706, "eval_loss": 0.32788407802581787, "eval_runtime": 21.9451, "eval_samples_per_second": 39.736, "eval_steps_per_second": 4.967, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029036687687039375, "epoch": 3.57, "learning_rate": 5.738717339667459e-06, "loss": 0.1211, "step": 7510, "task_loss": 0.014738757163286209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021804381161928177, "epoch": 3.57, "learning_rate": 5.7197149643705466e-06, "loss": 0.0494, "step": 7520, "task_loss": 0.005628753453493118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017943602055311203, "epoch": 3.58, "learning_rate": 5.700712589073634e-06, "loss": 0.0556, "step": 7530, "task_loss": 0.17601385712623596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04630360007286072, "epoch": 3.58, "learning_rate": 5.681710213776722e-06, "loss": 0.092, "step": 7540, "task_loss": 0.07351444661617279 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19516371190547943, "epoch": 3.59, "learning_rate": 5.662707838479811e-06, "loss": 0.102, "step": 7550, "task_loss": 0.11753221601247787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.075057253241539, "epoch": 3.59, "learning_rate": 5.643705463182898e-06, "loss": 0.0556, "step": 7560, "task_loss": 0.004980906844139099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04576558992266655, "epoch": 3.6, "learning_rate": 5.624703087885987e-06, "loss": 0.1244, "step": 7570, "task_loss": 0.022598903626203537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.008466158993542194, "epoch": 3.6, "learning_rate": 5.605700712589074e-06, "loss": 0.0988, "step": 7580, "task_loss": 0.0024762973189353943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02449963614344597, "epoch": 3.61, "learning_rate": 5.5866983372921624e-06, "loss": 0.1019, "step": 7590, "task_loss": 0.003134731203317642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026412509381771088, "epoch": 3.61, "learning_rate": 5.5676959619952495e-06, "loss": 0.0813, "step": 7600, "task_loss": 0.10514649748802185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09414191544055939, "epoch": 3.62, "learning_rate": 5.548693586698338e-06, "loss": 0.1004, "step": 7610, "task_loss": 0.15864884853363037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16137491166591644, "epoch": 3.62, "learning_rate": 5.529691211401426e-06, "loss": 0.1594, "step": 7620, "task_loss": 0.06678696721792221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.081720732152462, "epoch": 3.62, "learning_rate": 5.510688836104513e-06, "loss": 0.0898, "step": 7630, "task_loss": 0.13728320598602295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09084869921207428, "epoch": 3.63, "learning_rate": 5.491686460807602e-06, "loss": 0.0892, "step": 7640, "task_loss": 0.03533271327614784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.008733304217457771, "epoch": 3.63, "learning_rate": 5.472684085510689e-06, "loss": 0.0742, "step": 7650, "task_loss": 0.006262246519327164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024746078997850418, "epoch": 3.64, "learning_rate": 5.4536817102137775e-06, "loss": 0.0666, "step": 7660, "task_loss": 0.1450251340866089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.17060193419456482, "epoch": 3.64, "learning_rate": 5.4346793349168645e-06, "loss": 0.1122, "step": 7670, "task_loss": 0.4199802875518799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07124300301074982, "epoch": 3.65, "learning_rate": 5.415676959619953e-06, "loss": 0.0768, "step": 7680, "task_loss": 0.3081812262535095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04165620356798172, "epoch": 3.65, "learning_rate": 5.39667458432304e-06, "loss": 0.0542, "step": 7690, "task_loss": 0.008864354342222214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12530671060085297, "epoch": 3.66, "learning_rate": 5.377672209026129e-06, "loss": 0.065, "step": 7700, "task_loss": 0.007050979882478714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02896895259618759, "epoch": 3.66, "learning_rate": 5.358669833729217e-06, "loss": 0.0978, "step": 7710, "task_loss": 0.008473467081785202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02197858691215515, "epoch": 3.67, "learning_rate": 5.339667458432305e-06, "loss": 0.0708, "step": 7720, "task_loss": 0.13779109716415405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.14627870917320251, "epoch": 3.67, "learning_rate": 5.3206650831353925e-06, "loss": 0.0796, "step": 7730, "task_loss": 0.11915778368711472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009426005184650421, "epoch": 3.68, "learning_rate": 5.3016627078384795e-06, "loss": 0.0713, "step": 7740, "task_loss": 0.0036111027002334595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026764120906591415, "epoch": 3.68, "learning_rate": 5.282660332541568e-06, "loss": 0.0495, "step": 7750, "task_loss": 0.17961205542087555 }, { "epoch": 3.68, "eval_accuracy": 0.9151376146788991, "eval_loss": 0.2988388240337372, "eval_runtime": 21.9907, "eval_samples_per_second": 39.653, "eval_steps_per_second": 4.957, "step": 7750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02125394716858864, "epoch": 3.69, "learning_rate": 5.263657957244655e-06, "loss": 0.0592, "step": 7760, "task_loss": 0.13330192863941193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.023334577679634094, "epoch": 3.69, "learning_rate": 5.244655581947744e-06, "loss": 0.1014, "step": 7770, "task_loss": 0.2334352731704712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07482799887657166, "epoch": 3.7, "learning_rate": 5.225653206650832e-06, "loss": 0.0642, "step": 7780, "task_loss": 0.03224996477365494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06566259264945984, "epoch": 3.7, "learning_rate": 5.20665083135392e-06, "loss": 0.1021, "step": 7790, "task_loss": 0.03293333947658539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.20237162709236145, "epoch": 3.71, "learning_rate": 5.1876484560570076e-06, "loss": 0.0681, "step": 7800, "task_loss": 0.11539559811353683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020512059330940247, "epoch": 3.71, "learning_rate": 5.168646080760095e-06, "loss": 0.1384, "step": 7810, "task_loss": 0.14106139540672302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.033618003129959106, "epoch": 3.71, "learning_rate": 5.149643705463183e-06, "loss": 0.0839, "step": 7820, "task_loss": 0.2362319827079773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01834789663553238, "epoch": 3.72, "learning_rate": 5.130641330166272e-06, "loss": 0.0992, "step": 7830, "task_loss": 0.06447672098875046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08836167305707932, "epoch": 3.72, "learning_rate": 5.111638954869359e-06, "loss": 0.0935, "step": 7840, "task_loss": 0.08710360527038574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04010258615016937, "epoch": 3.73, "learning_rate": 5.092636579572448e-06, "loss": 0.1045, "step": 7850, "task_loss": 0.14512062072753906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3869872987270355, "epoch": 3.73, "learning_rate": 5.073634204275535e-06, "loss": 0.0924, "step": 7860, "task_loss": 0.2303229570388794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19745223224163055, "epoch": 3.74, "learning_rate": 5.054631828978623e-06, "loss": 0.0985, "step": 7870, "task_loss": 0.14754478633403778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12212712317705154, "epoch": 3.74, "learning_rate": 5.0356294536817105e-06, "loss": 0.1248, "step": 7880, "task_loss": 0.09426712989807129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04913119226694107, "epoch": 3.75, "learning_rate": 5.016627078384798e-06, "loss": 0.1346, "step": 7890, "task_loss": 0.09092673659324646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12528668344020844, "epoch": 3.75, "learning_rate": 4.997624703087887e-06, "loss": 0.1229, "step": 7900, "task_loss": 0.07792092859745026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0339021235704422, "epoch": 3.76, "learning_rate": 4.978622327790975e-06, "loss": 0.0752, "step": 7910, "task_loss": 0.015460405498743057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024016963317990303, "epoch": 3.76, "learning_rate": 4.959619952494062e-06, "loss": 0.0512, "step": 7920, "task_loss": 0.006217729300260544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04985890910029411, "epoch": 3.77, "learning_rate": 4.94061757719715e-06, "loss": 0.0897, "step": 7930, "task_loss": 0.20359504222869873 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05362790822982788, "epoch": 3.77, "learning_rate": 4.921615201900238e-06, "loss": 0.0983, "step": 7940, "task_loss": 0.017056990414857864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03559091314673424, "epoch": 3.78, "learning_rate": 4.9026128266033255e-06, "loss": 0.1052, "step": 7950, "task_loss": 0.020256590098142624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012951750308275223, "epoch": 3.78, "learning_rate": 4.883610451306413e-06, "loss": 0.0661, "step": 7960, "task_loss": 0.002787817269563675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018247317522764206, "epoch": 3.79, "learning_rate": 4.864608076009501e-06, "loss": 0.0567, "step": 7970, "task_loss": 0.005940131843090057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01746300235390663, "epoch": 3.79, "learning_rate": 4.84560570071259e-06, "loss": 0.0532, "step": 7980, "task_loss": 0.14214222133159637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06765209138393402, "epoch": 3.8, "learning_rate": 4.826603325415678e-06, "loss": 0.1438, "step": 7990, "task_loss": 0.02867637202143669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08068070560693741, "epoch": 3.8, "learning_rate": 4.807600950118766e-06, "loss": 0.0899, "step": 8000, "task_loss": 0.22640740871429443 }, { "epoch": 3.8, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.2796386182308197, "eval_runtime": 22.3683, "eval_samples_per_second": 38.984, "eval_steps_per_second": 4.873, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.025517662987113, "epoch": 3.81, "learning_rate": 4.7885985748218535e-06, "loss": 0.0794, "step": 8010, "task_loss": 0.009171344339847565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030752386897802353, "epoch": 3.81, "learning_rate": 4.769596199524941e-06, "loss": 0.0868, "step": 8020, "task_loss": 0.004918545484542847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024365507066249847, "epoch": 3.81, "learning_rate": 4.750593824228028e-06, "loss": 0.0529, "step": 8030, "task_loss": 0.05997316166758537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1693132221698761, "epoch": 3.82, "learning_rate": 4.731591448931116e-06, "loss": 0.1007, "step": 8040, "task_loss": 0.2038179188966751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030011361464858055, "epoch": 3.82, "learning_rate": 4.712589073634204e-06, "loss": 0.1164, "step": 8050, "task_loss": 0.016198869794607162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03136039152741432, "epoch": 3.83, "learning_rate": 4.693586698337293e-06, "loss": 0.0714, "step": 8060, "task_loss": 0.012641217559576035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026943452656269073, "epoch": 3.83, "learning_rate": 4.674584323040381e-06, "loss": 0.0604, "step": 8070, "task_loss": 0.003778461366891861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0963515192270279, "epoch": 3.84, "learning_rate": 4.6555819477434686e-06, "loss": 0.0641, "step": 8080, "task_loss": 0.1955173909664154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06787940859794617, "epoch": 3.84, "learning_rate": 4.636579572446556e-06, "loss": 0.07, "step": 8090, "task_loss": 0.029893912374973297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18741311132907867, "epoch": 3.85, "learning_rate": 4.617577197149644e-06, "loss": 0.0773, "step": 8100, "task_loss": 0.12163101881742477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07996465265750885, "epoch": 3.85, "learning_rate": 4.598574821852732e-06, "loss": 0.1448, "step": 8110, "task_loss": 0.04173227399587631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04726814478635788, "epoch": 3.86, "learning_rate": 4.57957244655582e-06, "loss": 0.0407, "step": 8120, "task_loss": 0.0074256956577301025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1402328610420227, "epoch": 3.86, "learning_rate": 4.560570071258908e-06, "loss": 0.0503, "step": 8130, "task_loss": 0.07385687530040741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2221362441778183, "epoch": 3.87, "learning_rate": 4.541567695961996e-06, "loss": 0.1239, "step": 8140, "task_loss": 0.16662901639938354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.027472279965877533, "epoch": 3.87, "learning_rate": 4.522565320665084e-06, "loss": 0.0452, "step": 8150, "task_loss": 0.007453102618455887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04676855728030205, "epoch": 3.88, "learning_rate": 4.5035629453681715e-06, "loss": 0.1059, "step": 8160, "task_loss": 0.15876612067222595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.025045206770300865, "epoch": 3.88, "learning_rate": 4.484560570071259e-06, "loss": 0.1289, "step": 8170, "task_loss": 0.004179120063781738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011216237209737301, "epoch": 3.89, "learning_rate": 4.465558194774347e-06, "loss": 0.1117, "step": 8180, "task_loss": 0.0025112181901931763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013475890271365643, "epoch": 3.89, "learning_rate": 4.446555819477435e-06, "loss": 0.0861, "step": 8190, "task_loss": 0.0036102384328842163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029119327664375305, "epoch": 3.9, "learning_rate": 4.427553444180523e-06, "loss": 0.0486, "step": 8200, "task_loss": 0.0045392923057079315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013042573817074299, "epoch": 3.9, "learning_rate": 4.408551068883611e-06, "loss": 0.0941, "step": 8210, "task_loss": 0.11527097970247269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02264486812055111, "epoch": 3.9, "learning_rate": 4.389548693586699e-06, "loss": 0.0687, "step": 8220, "task_loss": 0.04810720682144165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.007877323776483536, "epoch": 3.91, "learning_rate": 4.3705463182897865e-06, "loss": 0.0728, "step": 8230, "task_loss": 0.00500917062163353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030022092163562775, "epoch": 3.91, "learning_rate": 4.351543942992874e-06, "loss": 0.0973, "step": 8240, "task_loss": 0.02026822790503502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.20027883350849152, "epoch": 3.92, "learning_rate": 4.332541567695962e-06, "loss": 0.1102, "step": 8250, "task_loss": 0.12834565341472626 }, { "epoch": 3.92, "eval_accuracy": 0.9162844036697247, "eval_loss": 0.26672619581222534, "eval_runtime": 22.291, "eval_samples_per_second": 39.119, "eval_steps_per_second": 4.89, "step": 8250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12070262432098389, "epoch": 3.92, "learning_rate": 4.31353919239905e-06, "loss": 0.1236, "step": 8260, "task_loss": 0.2388094961643219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15468746423721313, "epoch": 3.93, "learning_rate": 4.294536817102138e-06, "loss": 0.1083, "step": 8270, "task_loss": 0.23200759291648865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18423910439014435, "epoch": 3.93, "learning_rate": 4.275534441805226e-06, "loss": 0.0713, "step": 8280, "task_loss": 0.09041578322649002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021810609847307205, "epoch": 3.94, "learning_rate": 4.256532066508314e-06, "loss": 0.0409, "step": 8290, "task_loss": 0.0035596080124378204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11239130049943924, "epoch": 3.94, "learning_rate": 4.2375296912114015e-06, "loss": 0.0715, "step": 8300, "task_loss": 0.18248476088047028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08229388296604156, "epoch": 3.95, "learning_rate": 4.218527315914489e-06, "loss": 0.1268, "step": 8310, "task_loss": 0.062466811388731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020841067656874657, "epoch": 3.95, "learning_rate": 4.199524940617577e-06, "loss": 0.0958, "step": 8320, "task_loss": 0.01168619841337204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07428955286741257, "epoch": 3.96, "learning_rate": 4.180522565320665e-06, "loss": 0.1132, "step": 8330, "task_loss": 0.020049307495355606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.14601218700408936, "epoch": 3.96, "learning_rate": 4.161520190023753e-06, "loss": 0.1099, "step": 8340, "task_loss": 0.06591594219207764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014185711741447449, "epoch": 3.97, "learning_rate": 4.142517814726842e-06, "loss": 0.0869, "step": 8350, "task_loss": 0.11826062202453613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.003288624342530966, "epoch": 3.97, "learning_rate": 4.1235154394299296e-06, "loss": 0.0704, "step": 8360, "task_loss": 0.008070297539234161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0797647014260292, "epoch": 3.98, "learning_rate": 4.104513064133017e-06, "loss": 0.0894, "step": 8370, "task_loss": 0.10876837372779846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04628456011414528, "epoch": 3.98, "learning_rate": 4.0855106888361044e-06, "loss": 0.1141, "step": 8380, "task_loss": 0.2866939604282379 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012273245491087437, "epoch": 3.99, "learning_rate": 4.066508313539192e-06, "loss": 0.1052, "step": 8390, "task_loss": 0.14446358382701874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06594362109899521, "epoch": 3.99, "learning_rate": 4.04750593824228e-06, "loss": 0.052, "step": 8400, "task_loss": 0.02517566829919815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017545923590660095, "epoch": 4.0, "learning_rate": 4.028503562945368e-06, "loss": 0.0642, "step": 8410, "task_loss": 0.006249226629734039 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04165567085146904, "epoch": 4.0, "learning_rate": 4.009501187648456e-06, "loss": 0.062, "step": 8420, "task_loss": 0.008884565904736519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06637102365493774, "epoch": 4.0, "learning_rate": 3.990498812351545e-06, "loss": 0.0575, "step": 8430, "task_loss": 0.11581560969352722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03454139828681946, "epoch": 4.01, "learning_rate": 3.9714964370546325e-06, "loss": 0.0857, "step": 8440, "task_loss": 0.025920506566762924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12666280567646027, "epoch": 4.01, "learning_rate": 3.95249406175772e-06, "loss": 0.052, "step": 8450, "task_loss": 0.22069977223873138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12481103837490082, "epoch": 4.02, "learning_rate": 3.933491686460808e-06, "loss": 0.0905, "step": 8460, "task_loss": 0.34771737456321716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.027316780760884285, "epoch": 4.02, "learning_rate": 3.914489311163896e-06, "loss": 0.045, "step": 8470, "task_loss": 0.008249737322330475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019640466198325157, "epoch": 4.03, "learning_rate": 3.895486935866984e-06, "loss": 0.0346, "step": 8480, "task_loss": 0.0024937279522418976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02247186377644539, "epoch": 4.03, "learning_rate": 3.876484560570072e-06, "loss": 0.0312, "step": 8490, "task_loss": 0.014447018504142761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.010977246798574924, "epoch": 4.04, "learning_rate": 3.857482185273159e-06, "loss": 0.061, "step": 8500, "task_loss": 0.03449002653360367 }, { "epoch": 4.04, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.283713161945343, "eval_runtime": 22.1383, "eval_samples_per_second": 39.389, "eval_steps_per_second": 4.924, "step": 8500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03942285478115082, "epoch": 4.04, "learning_rate": 3.8384798099762475e-06, "loss": 0.0416, "step": 8510, "task_loss": 0.005950760096311569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014301864430308342, "epoch": 4.05, "learning_rate": 3.819477434679335e-06, "loss": 0.069, "step": 8520, "task_loss": 0.0029358714818954468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.039006754755973816, "epoch": 4.05, "learning_rate": 3.800475059382423e-06, "loss": 0.0629, "step": 8530, "task_loss": 0.19067448377609253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07345309853553772, "epoch": 4.06, "learning_rate": 3.781472684085511e-06, "loss": 0.0515, "step": 8540, "task_loss": 0.18440312147140503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0816812664270401, "epoch": 4.06, "learning_rate": 3.762470308788599e-06, "loss": 0.0785, "step": 8550, "task_loss": 0.1739380657672882 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07522659748792648, "epoch": 4.07, "learning_rate": 3.743467933491687e-06, "loss": 0.089, "step": 8560, "task_loss": 0.027755912393331528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2582024037837982, "epoch": 4.07, "learning_rate": 3.7244655581947747e-06, "loss": 0.1013, "step": 8570, "task_loss": 0.27483993768692017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011575591750442982, "epoch": 4.08, "learning_rate": 3.7054631828978625e-06, "loss": 0.051, "step": 8580, "task_loss": 0.004382755607366562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.043528296053409576, "epoch": 4.08, "learning_rate": 3.6864608076009504e-06, "loss": 0.0395, "step": 8590, "task_loss": 0.01687121018767357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06470053642988205, "epoch": 4.09, "learning_rate": 3.6674584323040387e-06, "loss": 0.0767, "step": 8600, "task_loss": 0.12226305902004242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.16258491575717926, "epoch": 4.09, "learning_rate": 3.6484560570071265e-06, "loss": 0.0875, "step": 8610, "task_loss": 0.12856589257717133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012949327006936073, "epoch": 4.1, "learning_rate": 3.629453681710214e-06, "loss": 0.0215, "step": 8620, "task_loss": 0.006088566035032272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.22730335593223572, "epoch": 4.1, "learning_rate": 3.610451306413302e-06, "loss": 0.0768, "step": 8630, "task_loss": 0.3216843008995056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02113342471420765, "epoch": 4.1, "learning_rate": 3.5914489311163897e-06, "loss": 0.0908, "step": 8640, "task_loss": 0.3219309449195862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.3414075970649719, "epoch": 4.11, "learning_rate": 3.5724465558194776e-06, "loss": 0.1158, "step": 8650, "task_loss": 0.39104947447776794 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017360147088766098, "epoch": 4.11, "learning_rate": 3.5534441805225654e-06, "loss": 0.1378, "step": 8660, "task_loss": 0.005871061235666275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03779000788927078, "epoch": 4.12, "learning_rate": 3.5344418052256533e-06, "loss": 0.0691, "step": 8670, "task_loss": 0.0107310451567173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2806292772293091, "epoch": 4.12, "learning_rate": 3.5154394299287416e-06, "loss": 0.1005, "step": 8680, "task_loss": 0.3259883522987366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06188252195715904, "epoch": 4.13, "learning_rate": 3.4964370546318295e-06, "loss": 0.0653, "step": 8690, "task_loss": 0.12273290753364563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09528631716966629, "epoch": 4.13, "learning_rate": 3.4774346793349173e-06, "loss": 0.065, "step": 8700, "task_loss": 0.18623818457126617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.041278135031461716, "epoch": 4.14, "learning_rate": 3.458432304038005e-06, "loss": 0.0581, "step": 8710, "task_loss": 0.02173343300819397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012133522890508175, "epoch": 4.14, "learning_rate": 3.439429928741093e-06, "loss": 0.0752, "step": 8720, "task_loss": 0.3635708689689636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02899017184972763, "epoch": 4.15, "learning_rate": 3.4204275534441805e-06, "loss": 0.0563, "step": 8730, "task_loss": 0.11488357186317444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.18777720630168915, "epoch": 4.15, "learning_rate": 3.4014251781472683e-06, "loss": 0.0686, "step": 8740, "task_loss": 0.11035171151161194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1590900421142578, "epoch": 4.16, "learning_rate": 3.382422802850356e-06, "loss": 0.0594, "step": 8750, "task_loss": 0.05561506748199463 }, { "epoch": 4.16, "eval_accuracy": 0.9151376146788991, "eval_loss": 0.2766323983669281, "eval_runtime": 22.3282, "eval_samples_per_second": 39.054, "eval_steps_per_second": 4.882, "step": 8750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06593011319637299, "epoch": 4.16, "learning_rate": 3.3634204275534445e-06, "loss": 0.0356, "step": 8760, "task_loss": 0.0075419507920742035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019471261650323868, "epoch": 4.17, "learning_rate": 3.3444180522565324e-06, "loss": 0.0346, "step": 8770, "task_loss": 0.004301343113183975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13416732847690582, "epoch": 4.17, "learning_rate": 3.3254156769596202e-06, "loss": 0.0982, "step": 8780, "task_loss": 0.23774561285972595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.006739257834851742, "epoch": 4.18, "learning_rate": 3.306413301662708e-06, "loss": 0.0593, "step": 8790, "task_loss": 0.0027306489646434784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04961520433425903, "epoch": 4.18, "learning_rate": 3.287410926365796e-06, "loss": 0.0887, "step": 8800, "task_loss": 0.040518369525671005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017031384631991386, "epoch": 4.19, "learning_rate": 3.268408551068884e-06, "loss": 0.0779, "step": 8810, "task_loss": 0.006137076765298843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.010000055655837059, "epoch": 4.19, "learning_rate": 3.249406175771972e-06, "loss": 0.0801, "step": 8820, "task_loss": 0.006373908370733261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08995574712753296, "epoch": 4.19, "learning_rate": 3.23040380047506e-06, "loss": 0.0486, "step": 8830, "task_loss": 0.13840673863887787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018883461132645607, "epoch": 4.2, "learning_rate": 3.211401425178148e-06, "loss": 0.0651, "step": 8840, "task_loss": 0.002827569842338562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.038998525589704514, "epoch": 4.2, "learning_rate": 3.1923990498812353e-06, "loss": 0.0606, "step": 8850, "task_loss": 0.014568600803613663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.026058971881866455, "epoch": 4.21, "learning_rate": 3.173396674584323e-06, "loss": 0.0357, "step": 8860, "task_loss": 0.029363825917243958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024625074118375778, "epoch": 4.21, "learning_rate": 3.154394299287411e-06, "loss": 0.0547, "step": 8870, "task_loss": 0.004386581480503082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020818475633859634, "epoch": 4.22, "learning_rate": 3.135391923990499e-06, "loss": 0.0566, "step": 8880, "task_loss": 0.0970916673541069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015941135585308075, "epoch": 4.22, "learning_rate": 3.1163895486935867e-06, "loss": 0.0339, "step": 8890, "task_loss": 0.003059886395931244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020430579781532288, "epoch": 4.23, "learning_rate": 3.097387173396675e-06, "loss": 0.035, "step": 8900, "task_loss": 0.0052056461572647095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15293776988983154, "epoch": 4.23, "learning_rate": 3.078384798099763e-06, "loss": 0.0453, "step": 8910, "task_loss": 0.09499054402112961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15551890432834625, "epoch": 4.24, "learning_rate": 3.0593824228028507e-06, "loss": 0.0701, "step": 8920, "task_loss": 0.10638581216335297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11212066560983658, "epoch": 4.24, "learning_rate": 3.0403800475059386e-06, "loss": 0.0544, "step": 8930, "task_loss": 0.19491392374038696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016818024218082428, "epoch": 4.25, "learning_rate": 3.0213776722090264e-06, "loss": 0.0488, "step": 8940, "task_loss": 0.2066863775253296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0049272989854216576, "epoch": 4.25, "learning_rate": 3.0023752969121143e-06, "loss": 0.0446, "step": 8950, "task_loss": 0.007535018026828766 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03029637783765793, "epoch": 4.26, "learning_rate": 2.9833729216152017e-06, "loss": 0.0593, "step": 8960, "task_loss": 0.3232007324695587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07497206330299377, "epoch": 4.26, "learning_rate": 2.9643705463182896e-06, "loss": 0.087, "step": 8970, "task_loss": 0.18494026362895966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04392876848578453, "epoch": 4.27, "learning_rate": 2.945368171021378e-06, "loss": 0.0731, "step": 8980, "task_loss": 0.1489235907793045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01495037879794836, "epoch": 4.27, "learning_rate": 2.9263657957244658e-06, "loss": 0.0548, "step": 8990, "task_loss": 0.0053473226726055145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08316938579082489, "epoch": 4.28, "learning_rate": 2.9073634204275536e-06, "loss": 0.1062, "step": 9000, "task_loss": 0.029360707849264145 }, { "epoch": 4.28, "eval_accuracy": 0.9139908256880734, "eval_loss": 0.2777394950389862, "eval_runtime": 22.1404, "eval_samples_per_second": 39.385, "eval_steps_per_second": 4.923, "step": 9000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.023107042536139488, "epoch": 4.28, "learning_rate": 2.8883610451306415e-06, "loss": 0.0757, "step": 9010, "task_loss": 0.00649937242269516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030949320644140244, "epoch": 4.29, "learning_rate": 2.8693586698337293e-06, "loss": 0.0622, "step": 9020, "task_loss": 0.017877578735351562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02196587808430195, "epoch": 4.29, "learning_rate": 2.850356294536817e-06, "loss": 0.0572, "step": 9030, "task_loss": 0.014159291982650757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05502418801188469, "epoch": 4.29, "learning_rate": 2.8313539192399055e-06, "loss": 0.0318, "step": 9040, "task_loss": 0.20975813269615173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013882439583539963, "epoch": 4.3, "learning_rate": 2.8123515439429934e-06, "loss": 0.0576, "step": 9050, "task_loss": 0.09990142285823822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.010699973441660404, "epoch": 4.3, "learning_rate": 2.7933491686460812e-06, "loss": 0.0913, "step": 9060, "task_loss": 0.0033752471208572388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01082450058311224, "epoch": 4.31, "learning_rate": 2.774346793349169e-06, "loss": 0.0409, "step": 9070, "task_loss": 0.004309527575969696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016541698947548866, "epoch": 4.31, "learning_rate": 2.7553444180522565e-06, "loss": 0.0631, "step": 9080, "task_loss": 0.002968382090330124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.006014951970428228, "epoch": 4.32, "learning_rate": 2.7363420427553444e-06, "loss": 0.0573, "step": 9090, "task_loss": 0.1005413755774498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.22611913084983826, "epoch": 4.32, "learning_rate": 2.7173396674584322e-06, "loss": 0.1065, "step": 9100, "task_loss": 0.13456928730010986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03514198213815689, "epoch": 4.33, "learning_rate": 2.69833729216152e-06, "loss": 0.0453, "step": 9110, "task_loss": 0.0037595927715301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016409728676080704, "epoch": 4.33, "learning_rate": 2.6793349168646084e-06, "loss": 0.0601, "step": 9120, "task_loss": 0.005627848207950592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.016438759863376617, "epoch": 4.34, "learning_rate": 2.6603325415676963e-06, "loss": 0.088, "step": 9130, "task_loss": 0.004075001925230026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.048099249601364136, "epoch": 4.34, "learning_rate": 2.641330166270784e-06, "loss": 0.0575, "step": 9140, "task_loss": 0.021824389696121216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03359050303697586, "epoch": 4.35, "learning_rate": 2.622327790973872e-06, "loss": 0.0541, "step": 9150, "task_loss": 0.010654143989086151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.35756510496139526, "epoch": 4.35, "learning_rate": 2.60332541567696e-06, "loss": 0.0756, "step": 9160, "task_loss": 0.1754165142774582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015034861862659454, "epoch": 4.36, "learning_rate": 2.5843230403800477e-06, "loss": 0.1075, "step": 9170, "task_loss": 0.008607205003499985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.26236093044281006, "epoch": 4.36, "learning_rate": 2.565320665083136e-06, "loss": 0.121, "step": 9180, "task_loss": 0.14051243662834167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024604659527540207, "epoch": 4.37, "learning_rate": 2.546318289786224e-06, "loss": 0.0556, "step": 9190, "task_loss": 0.12805365025997162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.041992854326963425, "epoch": 4.37, "learning_rate": 2.5273159144893113e-06, "loss": 0.1061, "step": 9200, "task_loss": 0.007956545799970627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.15274813771247864, "epoch": 4.38, "learning_rate": 2.508313539192399e-06, "loss": 0.0563, "step": 9210, "task_loss": 0.06382400542497635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1668039709329605, "epoch": 4.38, "learning_rate": 2.4893111638954874e-06, "loss": 0.0911, "step": 9220, "task_loss": 0.32040756940841675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08066344261169434, "epoch": 4.38, "learning_rate": 2.470308788598575e-06, "loss": 0.0624, "step": 9230, "task_loss": 0.06633038818836212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021942077204585075, "epoch": 4.39, "learning_rate": 2.4513064133016627e-06, "loss": 0.0486, "step": 9240, "task_loss": 0.00539054349064827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021917153149843216, "epoch": 4.39, "learning_rate": 2.4323040380047506e-06, "loss": 0.0751, "step": 9250, "task_loss": 0.03953142464160919 }, { "epoch": 4.39, "eval_accuracy": 0.9220183486238532, "eval_loss": 0.2689874768257141, "eval_runtime": 22.1397, "eval_samples_per_second": 39.386, "eval_steps_per_second": 4.923, "step": 9250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013196494430303574, "epoch": 4.4, "learning_rate": 2.413301662707839e-06, "loss": 0.0473, "step": 9260, "task_loss": 0.004326473921537399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04993726685643196, "epoch": 4.4, "learning_rate": 2.3942992874109268e-06, "loss": 0.0502, "step": 9270, "task_loss": 0.03252362832427025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06098417192697525, "epoch": 4.41, "learning_rate": 2.375296912114014e-06, "loss": 0.0519, "step": 9280, "task_loss": 0.04169990494847298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030414171516895294, "epoch": 4.41, "learning_rate": 2.356294536817102e-06, "loss": 0.0838, "step": 9290, "task_loss": 0.008283041417598724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04094374552369118, "epoch": 4.42, "learning_rate": 2.3372921615201903e-06, "loss": 0.0802, "step": 9300, "task_loss": 0.01785259321331978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022956877946853638, "epoch": 4.42, "learning_rate": 2.318289786223278e-06, "loss": 0.0409, "step": 9310, "task_loss": 0.1291673481464386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.42349928617477417, "epoch": 4.43, "learning_rate": 2.299287410926366e-06, "loss": 0.1074, "step": 9320, "task_loss": 0.4811912178993225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021385155618190765, "epoch": 4.43, "learning_rate": 2.280285035629454e-06, "loss": 0.0496, "step": 9330, "task_loss": 0.006091751158237457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013296818360686302, "epoch": 4.44, "learning_rate": 2.261282660332542e-06, "loss": 0.0448, "step": 9340, "task_loss": 0.007342435419559479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.0073758745566010475, "epoch": 4.44, "learning_rate": 2.2422802850356297e-06, "loss": 0.074, "step": 9350, "task_loss": 0.0043178461492061615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.017247337847948074, "epoch": 4.45, "learning_rate": 2.2232779097387175e-06, "loss": 0.0686, "step": 9360, "task_loss": 0.002873547375202179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13869813084602356, "epoch": 4.45, "learning_rate": 2.2042755344418054e-06, "loss": 0.0623, "step": 9370, "task_loss": 0.12998461723327637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.037029024213552475, "epoch": 4.46, "learning_rate": 2.1852731591448932e-06, "loss": 0.0671, "step": 9380, "task_loss": 0.21687030792236328 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01134589221328497, "epoch": 4.46, "learning_rate": 2.166270783847981e-06, "loss": 0.057, "step": 9390, "task_loss": 0.19388972222805023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06970459967851639, "epoch": 4.47, "learning_rate": 2.147268408551069e-06, "loss": 0.0584, "step": 9400, "task_loss": 0.02260136976838112 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009460528381168842, "epoch": 4.47, "learning_rate": 2.128266033254157e-06, "loss": 0.045, "step": 9410, "task_loss": 0.12310618162155151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11489342153072357, "epoch": 4.48, "learning_rate": 2.1092636579572447e-06, "loss": 0.0371, "step": 9420, "task_loss": 0.08696582913398743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07559200376272202, "epoch": 4.48, "learning_rate": 2.0902612826603326e-06, "loss": 0.0618, "step": 9430, "task_loss": 0.3284105062484741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07968532294034958, "epoch": 4.48, "learning_rate": 2.071258907363421e-06, "loss": 0.1117, "step": 9440, "task_loss": 0.07459443807601929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.28972363471984863, "epoch": 4.49, "learning_rate": 2.0522565320665087e-06, "loss": 0.0473, "step": 9450, "task_loss": 0.16974028944969177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02968553639948368, "epoch": 4.49, "learning_rate": 2.033254156769596e-06, "loss": 0.0684, "step": 9460, "task_loss": 0.2634113132953644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.030829662457108498, "epoch": 4.5, "learning_rate": 2.014251781472684e-06, "loss": 0.0682, "step": 9470, "task_loss": 0.03290770202875137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03441692143678665, "epoch": 4.5, "learning_rate": 1.9952494061757723e-06, "loss": 0.108, "step": 9480, "task_loss": 0.011277075856924057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08928455412387848, "epoch": 4.51, "learning_rate": 1.97624703087886e-06, "loss": 0.0582, "step": 9490, "task_loss": 0.07190164923667908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01250946894288063, "epoch": 4.51, "learning_rate": 1.957244655581948e-06, "loss": 0.0386, "step": 9500, "task_loss": 0.009386200457811356 }, { "epoch": 4.51, "eval_accuracy": 0.9162844036697247, "eval_loss": 0.2667511999607086, "eval_runtime": 22.1403, "eval_samples_per_second": 39.385, "eval_steps_per_second": 4.923, "step": 9500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020871058106422424, "epoch": 4.52, "learning_rate": 1.938242280285036e-06, "loss": 0.0491, "step": 9510, "task_loss": 0.005145017057657242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03983244299888611, "epoch": 4.52, "learning_rate": 1.9192399049881237e-06, "loss": 0.05, "step": 9520, "task_loss": 0.24316395819187164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10421483218669891, "epoch": 4.53, "learning_rate": 1.9002375296912114e-06, "loss": 0.0836, "step": 9530, "task_loss": 0.06570681929588318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.043953992426395416, "epoch": 4.53, "learning_rate": 1.8812351543942995e-06, "loss": 0.0585, "step": 9540, "task_loss": 0.008459363132715225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.058616213500499725, "epoch": 4.54, "learning_rate": 1.8622327790973873e-06, "loss": 0.0522, "step": 9550, "task_loss": 0.26781898736953735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2032776176929474, "epoch": 4.54, "learning_rate": 1.8432304038004752e-06, "loss": 0.0885, "step": 9560, "task_loss": 0.3326171636581421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.041910551488399506, "epoch": 4.55, "learning_rate": 1.8242280285035633e-06, "loss": 0.0669, "step": 9570, "task_loss": 0.004698578268289566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02482573315501213, "epoch": 4.55, "learning_rate": 1.805225653206651e-06, "loss": 0.0582, "step": 9580, "task_loss": 0.005337722599506378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2271367311477661, "epoch": 4.56, "learning_rate": 1.7862232779097388e-06, "loss": 0.0514, "step": 9590, "task_loss": 0.1770418882369995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10083962976932526, "epoch": 4.56, "learning_rate": 1.7672209026128267e-06, "loss": 0.0755, "step": 9600, "task_loss": 0.20249593257904053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03125529736280441, "epoch": 4.57, "learning_rate": 1.7482185273159147e-06, "loss": 0.0481, "step": 9610, "task_loss": 0.012315116822719574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03914954140782356, "epoch": 4.57, "learning_rate": 1.7292161520190026e-06, "loss": 0.0612, "step": 9620, "task_loss": 0.1674419790506363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01954154670238495, "epoch": 4.57, "learning_rate": 1.7102137767220902e-06, "loss": 0.0653, "step": 9630, "task_loss": 0.06889810413122177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05788855999708176, "epoch": 4.58, "learning_rate": 1.691211401425178e-06, "loss": 0.0253, "step": 9640, "task_loss": 0.14915083348751068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01645825058221817, "epoch": 4.58, "learning_rate": 1.6722090261282662e-06, "loss": 0.0519, "step": 9650, "task_loss": 0.12134365737438202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04989754408597946, "epoch": 4.59, "learning_rate": 1.653206650831354e-06, "loss": 0.1004, "step": 9660, "task_loss": 0.1625884622335434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019783230498433113, "epoch": 4.59, "learning_rate": 1.634204275534442e-06, "loss": 0.0567, "step": 9670, "task_loss": 0.004505373537540436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.19092217087745667, "epoch": 4.6, "learning_rate": 1.61520190023753e-06, "loss": 0.0458, "step": 9680, "task_loss": 0.2721264362335205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10719658434391022, "epoch": 4.6, "learning_rate": 1.5961995249406176e-06, "loss": 0.0513, "step": 9690, "task_loss": 0.03760954737663269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05233538895845413, "epoch": 4.61, "learning_rate": 1.5771971496437055e-06, "loss": 0.0688, "step": 9700, "task_loss": 0.12094153463840485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014219870790839195, "epoch": 4.61, "learning_rate": 1.5581947743467934e-06, "loss": 0.0581, "step": 9710, "task_loss": 0.0061318278312683105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012800279073417187, "epoch": 4.62, "learning_rate": 1.5391923990498814e-06, "loss": 0.0268, "step": 9720, "task_loss": 0.0036646276712417603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12216310203075409, "epoch": 4.62, "learning_rate": 1.5201900237529693e-06, "loss": 0.1461, "step": 9730, "task_loss": 0.13348951935768127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07808056473731995, "epoch": 4.63, "learning_rate": 1.5011876484560572e-06, "loss": 0.0822, "step": 9740, "task_loss": 0.22806300222873688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.010630585253238678, "epoch": 4.63, "learning_rate": 1.4821852731591448e-06, "loss": 0.0284, "step": 9750, "task_loss": 0.002479270100593567 }, { "epoch": 4.63, "eval_accuracy": 0.9185779816513762, "eval_loss": 0.2812165319919586, "eval_runtime": 22.1715, "eval_samples_per_second": 39.33, "eval_steps_per_second": 4.916, "step": 9750 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022376172244548798, "epoch": 4.64, "learning_rate": 1.4631828978622329e-06, "loss": 0.0637, "step": 9760, "task_loss": 0.0058107636868953705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009340550750494003, "epoch": 4.64, "learning_rate": 1.4441805225653207e-06, "loss": 0.0344, "step": 9770, "task_loss": 0.0033394992351531982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.010714657604694366, "epoch": 4.65, "learning_rate": 1.4251781472684086e-06, "loss": 0.0541, "step": 9780, "task_loss": 0.004770912230014801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024335071444511414, "epoch": 4.65, "learning_rate": 1.4061757719714967e-06, "loss": 0.0614, "step": 9790, "task_loss": 0.006472300738096237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07776258885860443, "epoch": 4.66, "learning_rate": 1.3871733966745845e-06, "loss": 0.075, "step": 9800, "task_loss": 0.039640650153160095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04833105206489563, "epoch": 4.66, "learning_rate": 1.3681710213776722e-06, "loss": 0.0351, "step": 9810, "task_loss": 0.0052606649696826935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06957106292247772, "epoch": 4.67, "learning_rate": 1.34916864608076e-06, "loss": 0.0566, "step": 9820, "task_loss": 0.012664098292589188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.018420187756419182, "epoch": 4.67, "learning_rate": 1.3301662707838481e-06, "loss": 0.0743, "step": 9830, "task_loss": 0.012570954859256744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022069044411182404, "epoch": 4.67, "learning_rate": 1.311163895486936e-06, "loss": 0.1181, "step": 9840, "task_loss": 0.2193067967891693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.10804280638694763, "epoch": 4.68, "learning_rate": 1.2921615201900239e-06, "loss": 0.0684, "step": 9850, "task_loss": 0.1666574627161026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021793734282255173, "epoch": 4.68, "learning_rate": 1.273159144893112e-06, "loss": 0.0713, "step": 9860, "task_loss": 0.1198580265045166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013394506648182869, "epoch": 4.69, "learning_rate": 1.2541567695961996e-06, "loss": 0.077, "step": 9870, "task_loss": 0.003841262310743332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.024098927155137062, "epoch": 4.69, "learning_rate": 1.2351543942992874e-06, "loss": 0.0668, "step": 9880, "task_loss": 0.11605469882488251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012392224743962288, "epoch": 4.7, "learning_rate": 1.2161520190023753e-06, "loss": 0.0472, "step": 9890, "task_loss": 0.003138858824968338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05335499346256256, "epoch": 4.7, "learning_rate": 1.1971496437054634e-06, "loss": 0.0593, "step": 9900, "task_loss": 0.18697652220726013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.028973210602998734, "epoch": 4.71, "learning_rate": 1.178147268408551e-06, "loss": 0.0279, "step": 9910, "task_loss": 0.11546307057142258 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09816677868366241, "epoch": 4.71, "learning_rate": 1.159144893111639e-06, "loss": 0.0397, "step": 9920, "task_loss": 0.0736415684223175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011303352192044258, "epoch": 4.72, "learning_rate": 1.140142517814727e-06, "loss": 0.0569, "step": 9930, "task_loss": 0.004318550229072571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01750335283577442, "epoch": 4.72, "learning_rate": 1.1211401425178148e-06, "loss": 0.0446, "step": 9940, "task_loss": 0.19570045173168182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.046896446496248245, "epoch": 4.73, "learning_rate": 1.1021377672209027e-06, "loss": 0.0291, "step": 9950, "task_loss": 0.07542459666728973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.2460804581642151, "epoch": 4.73, "learning_rate": 1.0831353919239906e-06, "loss": 0.061, "step": 9960, "task_loss": 0.16217932105064392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012438797391951084, "epoch": 4.74, "learning_rate": 1.0641330166270784e-06, "loss": 0.0274, "step": 9970, "task_loss": 0.0034538879990577698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012422207742929459, "epoch": 4.74, "learning_rate": 1.0451306413301663e-06, "loss": 0.0365, "step": 9980, "task_loss": 0.003980562090873718 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.09234738349914551, "epoch": 4.75, "learning_rate": 1.0261282660332544e-06, "loss": 0.0777, "step": 9990, "task_loss": 0.07327043265104294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12707777321338654, "epoch": 4.75, "learning_rate": 1.007125890736342e-06, "loss": 0.1016, "step": 10000, "task_loss": 0.08033004403114319 }, { "epoch": 4.75, "eval_accuracy": 0.9162844036697247, "eval_loss": 0.2825167179107666, "eval_runtime": 22.1651, "eval_samples_per_second": 39.341, "eval_steps_per_second": 4.918, "step": 10000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07169970124959946, "epoch": 4.76, "learning_rate": 9.8812351543943e-07, "loss": 0.0671, "step": 10010, "task_loss": 0.06848999857902527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.013409988954663277, "epoch": 4.76, "learning_rate": 9.69121140142518e-07, "loss": 0.0377, "step": 10020, "task_loss": 0.09568732976913452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07166978716850281, "epoch": 4.76, "learning_rate": 9.501187648456057e-07, "loss": 0.0294, "step": 10030, "task_loss": 0.0640857145190239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.04987531155347824, "epoch": 4.77, "learning_rate": 9.311163895486937e-07, "loss": 0.0656, "step": 10040, "task_loss": 0.021407999098300934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.007782880216836929, "epoch": 4.77, "learning_rate": 9.121140142517816e-07, "loss": 0.0498, "step": 10050, "task_loss": 0.002929363399744034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021184280514717102, "epoch": 4.78, "learning_rate": 8.931116389548694e-07, "loss": 0.0508, "step": 10060, "task_loss": 0.004369787871837616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.012340977787971497, "epoch": 4.78, "learning_rate": 8.741092636579574e-07, "loss": 0.0516, "step": 10070, "task_loss": 0.006207786500453949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1317601352930069, "epoch": 4.79, "learning_rate": 8.551068883610451e-07, "loss": 0.0475, "step": 10080, "task_loss": 0.004766244441270828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.023309551179409027, "epoch": 4.79, "learning_rate": 8.361045130641331e-07, "loss": 0.0457, "step": 10090, "task_loss": 0.008698023855686188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.046007562428712845, "epoch": 4.8, "learning_rate": 8.17102137767221e-07, "loss": 0.1062, "step": 10100, "task_loss": 0.05794458091259003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01069045439362526, "epoch": 4.8, "learning_rate": 7.980997624703088e-07, "loss": 0.0411, "step": 10110, "task_loss": 0.002656955271959305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.021371502429246902, "epoch": 4.81, "learning_rate": 7.790973871733967e-07, "loss": 0.0504, "step": 10120, "task_loss": 0.005056999623775482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.060870636254549026, "epoch": 4.81, "learning_rate": 7.600950118764846e-07, "loss": 0.0759, "step": 10130, "task_loss": 0.097642682492733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.019857440143823624, "epoch": 4.82, "learning_rate": 7.410926365795724e-07, "loss": 0.0455, "step": 10140, "task_loss": 0.005811773240566254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.05780591070652008, "epoch": 4.82, "learning_rate": 7.220902612826604e-07, "loss": 0.0547, "step": 10150, "task_loss": 0.032336119562387466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.043222397565841675, "epoch": 4.83, "learning_rate": 7.030878859857483e-07, "loss": 0.0618, "step": 10160, "task_loss": 0.002787157893180847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01754024438560009, "epoch": 4.83, "learning_rate": 6.840855106888361e-07, "loss": 0.0529, "step": 10170, "task_loss": 0.0054728202521800995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.007267419248819351, "epoch": 4.84, "learning_rate": 6.650831353919241e-07, "loss": 0.0394, "step": 10180, "task_loss": 0.004029855132102966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.060019608587026596, "epoch": 4.84, "learning_rate": 6.460807600950119e-07, "loss": 0.0454, "step": 10190, "task_loss": 0.005614615976810455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06279192864894867, "epoch": 4.85, "learning_rate": 6.270783847980998e-07, "loss": 0.0804, "step": 10200, "task_loss": 0.22416365146636963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01789834164083004, "epoch": 4.85, "learning_rate": 6.080760095011877e-07, "loss": 0.0704, "step": 10210, "task_loss": 0.0029133372008800507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.023510048165917397, "epoch": 4.86, "learning_rate": 5.890736342042755e-07, "loss": 0.0306, "step": 10220, "task_loss": 0.008717145770788193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06903427839279175, "epoch": 4.86, "learning_rate": 5.700712589073635e-07, "loss": 0.0636, "step": 10230, "task_loss": 0.02681458741426468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.11212094873189926, "epoch": 4.86, "learning_rate": 5.510688836104513e-07, "loss": 0.0766, "step": 10240, "task_loss": 0.05283607542514801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.020517665892839432, "epoch": 4.87, "learning_rate": 5.320665083135392e-07, "loss": 0.0507, "step": 10250, "task_loss": 0.006865642964839935 }, { "epoch": 4.87, "eval_accuracy": 0.9139908256880734, "eval_loss": 0.280513197183609, "eval_runtime": 22.1726, "eval_samples_per_second": 39.328, "eval_steps_per_second": 4.916, "step": 10250 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.06565746665000916, "epoch": 4.87, "learning_rate": 5.130641330166272e-07, "loss": 0.084, "step": 10260, "task_loss": 0.08022348582744598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.041272662580013275, "epoch": 4.88, "learning_rate": 4.94061757719715e-07, "loss": 0.0807, "step": 10270, "task_loss": 0.044755056500434875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02130916342139244, "epoch": 4.88, "learning_rate": 4.7505938242280285e-07, "loss": 0.0456, "step": 10280, "task_loss": 0.005585514008998871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.008105727843940258, "epoch": 4.89, "learning_rate": 4.560570071258908e-07, "loss": 0.0662, "step": 10290, "task_loss": 0.007067546248435974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02983175590634346, "epoch": 4.89, "learning_rate": 4.370546318289787e-07, "loss": 0.0701, "step": 10300, "task_loss": 0.22576290369033813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.009511064738035202, "epoch": 4.9, "learning_rate": 4.1805225653206654e-07, "loss": 0.085, "step": 10310, "task_loss": 0.007220160216093063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.037107907235622406, "epoch": 4.9, "learning_rate": 3.990498812351544e-07, "loss": 0.0351, "step": 10320, "task_loss": 0.1071058064699173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.007235179655253887, "epoch": 4.91, "learning_rate": 3.800475059382423e-07, "loss": 0.0385, "step": 10330, "task_loss": 0.002338908612728119 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014286589808762074, "epoch": 4.91, "learning_rate": 3.610451306413302e-07, "loss": 0.0366, "step": 10340, "task_loss": 0.0025917217135429382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.02622096985578537, "epoch": 4.92, "learning_rate": 3.4204275534441805e-07, "loss": 0.0292, "step": 10350, "task_loss": 0.0064817629754543304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.011151728220283985, "epoch": 4.92, "learning_rate": 3.2304038004750596e-07, "loss": 0.0683, "step": 10360, "task_loss": 0.003448858857154846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015970010310411453, "epoch": 4.93, "learning_rate": 3.040380047505938e-07, "loss": 0.0584, "step": 10370, "task_loss": 0.005513232201337814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.1370335817337036, "epoch": 4.93, "learning_rate": 2.8503562945368174e-07, "loss": 0.0771, "step": 10380, "task_loss": 0.008187536150217056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.022085029631853104, "epoch": 4.94, "learning_rate": 2.660332541567696e-07, "loss": 0.0691, "step": 10390, "task_loss": 0.009103760123252869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.029131721705198288, "epoch": 4.94, "learning_rate": 2.470308788598575e-07, "loss": 0.0614, "step": 10400, "task_loss": 0.008749555796384811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01142764464020729, "epoch": 4.95, "learning_rate": 2.280285035629454e-07, "loss": 0.0506, "step": 10410, "task_loss": 0.003860827535390854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.22063679993152618, "epoch": 4.95, "learning_rate": 2.0902612826603327e-07, "loss": 0.1186, "step": 10420, "task_loss": 0.411041259765625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.00713141867890954, "epoch": 4.95, "learning_rate": 1.9002375296912116e-07, "loss": 0.0519, "step": 10430, "task_loss": 0.0033237673342227936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.08448931574821472, "epoch": 4.96, "learning_rate": 1.7102137767220902e-07, "loss": 0.053, "step": 10440, "task_loss": 0.19629473984241486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.014177798293530941, "epoch": 4.96, "learning_rate": 1.520190023752969e-07, "loss": 0.0672, "step": 10450, "task_loss": 0.009178481996059418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13920198380947113, "epoch": 4.97, "learning_rate": 1.330166270783848e-07, "loss": 0.0836, "step": 10460, "task_loss": 0.05482466146349907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.01523562241345644, "epoch": 4.97, "learning_rate": 1.140142517814727e-07, "loss": 0.0335, "step": 10470, "task_loss": 0.0879075825214386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.015354710631072521, "epoch": 4.98, "learning_rate": 9.501187648456058e-08, "loss": 0.0834, "step": 10480, "task_loss": 0.12236123532056808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.12151747941970825, "epoch": 4.98, "learning_rate": 7.600950118764846e-08, "loss": 0.0548, "step": 10490, "task_loss": 0.1053650826215744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.07341088354587555, "epoch": 4.99, "learning_rate": 5.700712589073635e-08, "loss": 0.0709, "step": 10500, "task_loss": 0.12648561596870422 }, { "epoch": 4.99, "eval_accuracy": 0.9139908256880734, "eval_loss": 0.2854968011379242, "eval_runtime": 21.9579, "eval_samples_per_second": 39.712, "eval_steps_per_second": 4.964, "step": 10500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.13635969161987305, "epoch": 4.99, "learning_rate": 3.800475059382423e-08, "loss": 0.0549, "step": 10510, "task_loss": 0.1342453956604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.4047220222636254, "compression/movement_sparsity/model_sparsity": 0.3142793903220987, "compression_loss": 0.0, "distillation_loss": 0.03788226097822189, "epoch": 5.0, "learning_rate": 1.9002375296912114e-08, "loss": 0.0381, "step": 10520, "task_loss": 0.05685931071639061 }, { "epoch": 5.0, "step": 10525, "total_flos": 2.220815486243328e+16, "train_loss": 2.258239375927669, "train_runtime": 6578.4214, "train_samples_per_second": 51.189, "train_steps_per_second": 1.6 } ], "max_steps": 10525, "num_train_epochs": 5, "total_flos": 2.220815486243328e+16, "trial_name": null, "trial_params": null }