{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.446343779677113, "global_step": 11000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7, "compression_loss": 0.0, "distillation_loss": 1.842069387435913, "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.7228, "step": 1, "task_loss": 0.6496906280517578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7000356083082708, "compression_loss": 0.0, "distillation_loss": 1.8232104778289795, "epoch": 0.0, "learning_rate": 5e-05, "loss": 1.6991, "step": 2, "task_loss": 0.5824222564697266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7000712081625273, "compression_loss": 0.0, "distillation_loss": 1.8167695999145508, "epoch": 0.0, "learning_rate": 4.99999977293148e-05, "loss": 1.6968, "step": 3, "task_loss": 0.6166934967041016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7001067995637733, "compression_loss": 0.0, "distillation_loss": 1.810304880142212, "epoch": 0.0, "learning_rate": 4.999999091725961e-05, "loss": 1.6919, "step": 4, "task_loss": 0.6265115737915039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7001423825130124, "compression_loss": 0.0, "distillation_loss": 1.7883914709091187, "epoch": 0.0, "learning_rate": 4.999997956383567e-05, "loss": 1.6653, "step": 5, "task_loss": 0.5578117370605469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7001779570112483, "compression_loss": 0.0, "distillation_loss": 1.4738185405731201, "epoch": 0.01, "learning_rate": 4.999996366904504e-05, "loss": 1.3741, "step": 6, "task_loss": 0.47693514823913574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7002135230594846, "compression_loss": 0.0, "distillation_loss": 1.6521902084350586, "epoch": 0.01, "learning_rate": 4.999996366904504e-05, "loss": 1.5516, "step": 7, "task_loss": 0.646312952041626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.700249080658725, "compression_loss": 0.0, "distillation_loss": 1.5596864223480225, "epoch": 0.01, "learning_rate": 4.999994323289061e-05, "loss": 1.4611, "step": 8, "task_loss": 0.5743134021759033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7002846298099733, "compression_loss": 0.0, "distillation_loss": 1.4804778099060059, "epoch": 0.01, "learning_rate": 4.999991825537609e-05, "loss": 1.3854, "step": 9, "task_loss": 0.5301415920257568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7003201705142331, "compression_loss": 0.0, "distillation_loss": 1.4417721033096313, "epoch": 0.01, "learning_rate": 4.999988873650602e-05, "loss": 1.343, "step": 10, "task_loss": 0.4539656639099121 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.700355702772508, "compression_loss": 0.0, "distillation_loss": 1.2685056924819946, "epoch": 0.01, "learning_rate": 4.999985467628575e-05, "loss": 1.1892, "step": 11, "task_loss": 0.47563648223876953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7003912265858019, "compression_loss": 0.0, "distillation_loss": 1.2336152791976929, "epoch": 0.01, "learning_rate": 4.999981607472149e-05, "loss": 1.1549, "step": 12, "task_loss": 0.44687366485595703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7004267419551183, "compression_loss": 0.0, "distillation_loss": 1.1893235445022583, "epoch": 0.01, "learning_rate": 4.999977293182023e-05, "loss": 1.1171, "step": 13, "task_loss": 0.4666560888290405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7004622488814609, "compression_loss": 0.0, "distillation_loss": 1.2539210319519043, "epoch": 0.01, "learning_rate": 4.999972524758982e-05, "loss": 1.1771, "step": 14, "task_loss": 0.48534464836120605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7004977473658335, "compression_loss": 0.0, "distillation_loss": 0.7343780994415283, "epoch": 0.01, "learning_rate": 4.999967302203893e-05, "loss": 0.6899, "step": 15, "task_loss": 0.28955867886543274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7005332374092396, "compression_loss": 0.0, "distillation_loss": 1.198635220527649, "epoch": 0.02, "learning_rate": 4.9999616255177016e-05, "loss": 1.1238, "step": 16, "task_loss": 0.4500184953212738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.700568719012683, "compression_loss": 0.0, "distillation_loss": 0.9082566499710083, "epoch": 0.02, "learning_rate": 4.999955494701443e-05, "loss": 0.851, "step": 17, "task_loss": 0.3353431224822998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7006041921771674, "compression_loss": 0.0, "distillation_loss": 1.1613315343856812, "epoch": 0.02, "learning_rate": 4.999948909756227e-05, "loss": 1.0989, "step": 18, "task_loss": 0.5371114015579224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7006396569036965, "compression_loss": 0.0, "distillation_loss": 1.3335918188095093, "epoch": 0.02, "learning_rate": 4.9999418706832525e-05, "loss": 1.2628, "step": 19, "task_loss": 0.626000165939331 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7006751131932738, "compression_loss": 0.0, "distillation_loss": 1.0266211032867432, "epoch": 0.02, "learning_rate": 4.9999343774837976e-05, "loss": 0.9634, "step": 20, "task_loss": 0.39482954144477844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7007105610469031, "compression_loss": 0.0, "distillation_loss": 0.8103340864181519, "epoch": 0.02, "learning_rate": 4.999926430159223e-05, "loss": 0.7583, "step": 21, "task_loss": 0.2896346151828766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7007460004655882, "compression_loss": 0.0, "distillation_loss": 1.3146494626998901, "epoch": 0.02, "learning_rate": 4.9999180287109725e-05, "loss": 1.2389, "step": 22, "task_loss": 0.5570744276046753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7007814314503326, "compression_loss": 0.0, "distillation_loss": 0.8772894740104675, "epoch": 0.02, "learning_rate": 4.999909173140572e-05, "loss": 0.8244, "step": 23, "task_loss": 0.3481764495372772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.70081685400214, "compression_loss": 0.0, "distillation_loss": 0.7861129641532898, "epoch": 0.02, "learning_rate": 4.999899863449631e-05, "loss": 0.736, "step": 24, "task_loss": 0.2852080166339874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7008522681220143, "compression_loss": 0.0, "distillation_loss": 1.023297667503357, "epoch": 0.02, "learning_rate": 4.99989009963984e-05, "loss": 0.9664, "step": 25, "task_loss": 0.45472821593284607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7008876738109588, "compression_loss": 0.0, "distillation_loss": 0.6092305183410645, "epoch": 0.02, "learning_rate": 4.999879881712973e-05, "loss": 0.5785, "step": 26, "task_loss": 0.3023172616958618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7009230710699776, "compression_loss": 0.0, "distillation_loss": 0.6762804985046387, "epoch": 0.03, "learning_rate": 4.999869209670885e-05, "loss": 0.6369, "step": 27, "task_loss": 0.28206872940063477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7009584599000741, "compression_loss": 0.0, "distillation_loss": 1.0091867446899414, "epoch": 0.03, "learning_rate": 4.999858083515517e-05, "loss": 0.9509, "step": 28, "task_loss": 0.4264417588710785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7009938403022521, "compression_loss": 0.0, "distillation_loss": 0.5569751262664795, "epoch": 0.03, "learning_rate": 4.999846503248888e-05, "loss": 0.5272, "step": 29, "task_loss": 0.2587721347808838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7010292122775152, "compression_loss": 0.0, "distillation_loss": 1.1096380949020386, "epoch": 0.03, "learning_rate": 4.9998344688731027e-05, "loss": 1.0493, "step": 30, "task_loss": 0.5061087608337402 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7010645758268672, "compression_loss": 0.0, "distillation_loss": 1.5996776819229126, "epoch": 0.03, "learning_rate": 4.999821980390346e-05, "loss": 1.5213, "step": 31, "task_loss": 0.8160263299942017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7010999309513116, "compression_loss": 0.0, "distillation_loss": 1.0707523822784424, "epoch": 0.03, "learning_rate": 4.999809037802888e-05, "loss": 1.0159, "step": 32, "task_loss": 0.5219181776046753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7011352776518522, "compression_loss": 0.0, "distillation_loss": 0.9755439162254333, "epoch": 0.03, "learning_rate": 4.999795641113079e-05, "loss": 0.9262, "step": 33, "task_loss": 0.4821498394012451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7011706159294928, "compression_loss": 0.0, "distillation_loss": 1.1395909786224365, "epoch": 0.03, "learning_rate": 4.9997817903233527e-05, "loss": 1.1089, "step": 34, "task_loss": 0.8322716355323792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7012059457852369, "compression_loss": 0.0, "distillation_loss": 0.9883664846420288, "epoch": 0.03, "learning_rate": 4.999767485436224e-05, "loss": 0.9305, "step": 35, "task_loss": 0.41015535593032837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7012412672200883, "compression_loss": 0.0, "distillation_loss": 0.8167462944984436, "epoch": 0.03, "learning_rate": 4.999752726454293e-05, "loss": 0.7736, "step": 36, "task_loss": 0.3853650391101837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7012765802350505, "compression_loss": 0.0, "distillation_loss": 0.7159909009933472, "epoch": 0.04, "learning_rate": 4.9997375133802415e-05, "loss": 0.6719, "step": 37, "task_loss": 0.27524322271347046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7013118848311274, "compression_loss": 0.0, "distillation_loss": 0.8945153951644897, "epoch": 0.04, "learning_rate": 4.999721846216831e-05, "loss": 0.8469, "step": 38, "task_loss": 0.4183288812637329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7013471810093225, "compression_loss": 0.0, "distillation_loss": 0.7273141145706177, "epoch": 0.04, "learning_rate": 4.999705724966908e-05, "loss": 0.6819, "step": 39, "task_loss": 0.27272433042526245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7013824687706396, "compression_loss": 0.0, "distillation_loss": 0.5879485011100769, "epoch": 0.04, "learning_rate": 4.999689149633402e-05, "loss": 0.5481, "step": 40, "task_loss": 0.18962328135967255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7014177481160824, "compression_loss": 0.0, "distillation_loss": 0.3988691568374634, "epoch": 0.04, "learning_rate": 4.999672120219323e-05, "loss": 0.3689, "step": 41, "task_loss": 0.09930090606212616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7014530190466545, "compression_loss": 0.0, "distillation_loss": 1.1346763372421265, "epoch": 0.04, "learning_rate": 4.999654636727764e-05, "loss": 1.0762, "step": 42, "task_loss": 0.5502893924713135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7014882815633595, "compression_loss": 0.0, "distillation_loss": 1.0310869216918945, "epoch": 0.04, "learning_rate": 4.9996366991619034e-05, "loss": 0.9707, "step": 43, "task_loss": 0.42731019854545593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7015235356672014, "compression_loss": 0.0, "distillation_loss": 0.7622416615486145, "epoch": 0.04, "learning_rate": 4.999618307524997e-05, "loss": 0.7244, "step": 44, "task_loss": 0.38374945521354675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7015587813591835, "compression_loss": 0.0, "distillation_loss": 1.0716254711151123, "epoch": 0.04, "learning_rate": 4.999599461820387e-05, "loss": 1.0246, "step": 45, "task_loss": 0.6018234491348267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7015940186403098, "compression_loss": 0.0, "distillation_loss": 0.7515419721603394, "epoch": 0.04, "learning_rate": 4.999580162051497e-05, "loss": 0.7087, "step": 46, "task_loss": 0.3226703405380249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7016292475115837, "compression_loss": 0.0, "distillation_loss": 1.1621253490447998, "epoch": 0.04, "learning_rate": 4.9995604082218314e-05, "loss": 1.1132, "step": 47, "task_loss": 0.6729358434677124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7016644679740092, "compression_loss": 0.0, "distillation_loss": 0.5425492525100708, "epoch": 0.05, "learning_rate": 4.99954020033498e-05, "loss": 0.5159, "step": 48, "task_loss": 0.2755749225616455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7016996800285896, "compression_loss": 0.0, "distillation_loss": 0.7733719348907471, "epoch": 0.05, "learning_rate": 4.9995195383946135e-05, "loss": 0.7346, "step": 49, "task_loss": 0.38580086827278137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.701734883676329, "compression_loss": 0.0, "distillation_loss": 0.7129077911376953, "epoch": 0.05, "learning_rate": 4.999498422404485e-05, "loss": 0.6741, "step": 50, "task_loss": 0.3249415457248688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7017700789182307, "compression_loss": 0.0, "distillation_loss": 0.8956947922706604, "epoch": 0.05, "learning_rate": 4.999476852368431e-05, "loss": 0.8533, "step": 51, "task_loss": 0.4715935289859772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7018052657552987, "compression_loss": 0.0, "distillation_loss": 1.1765224933624268, "epoch": 0.05, "learning_rate": 4.999454828290369e-05, "loss": 1.1277, "step": 52, "task_loss": 0.6885514855384827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7018404441885364, "compression_loss": 0.0, "distillation_loss": 0.7294641137123108, "epoch": 0.05, "learning_rate": 4.999432350174299e-05, "loss": 0.6924, "step": 53, "task_loss": 0.3584412932395935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7018756142189477, "compression_loss": 0.0, "distillation_loss": 0.4732089936733246, "epoch": 0.05, "learning_rate": 4.9994094180243055e-05, "loss": 0.4407, "step": 54, "task_loss": 0.14808303117752075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7019107758475361, "compression_loss": 0.0, "distillation_loss": 0.7207993268966675, "epoch": 0.05, "learning_rate": 4.999386031844554e-05, "loss": 0.6882, "step": 55, "task_loss": 0.39489054679870605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7019459290753055, "compression_loss": 0.0, "distillation_loss": 0.7156393527984619, "epoch": 0.05, "learning_rate": 4.999362191639293e-05, "loss": 0.6781, "step": 56, "task_loss": 0.3404168486595154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7019810739032595, "compression_loss": 0.0, "distillation_loss": 0.528157651424408, "epoch": 0.05, "learning_rate": 4.999337897412852e-05, "loss": 0.4956, "step": 57, "task_loss": 0.20256038010120392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7020162103324016, "compression_loss": 0.0, "distillation_loss": 0.7788621187210083, "epoch": 0.06, "learning_rate": 4.999313149169645e-05, "loss": 0.7403, "step": 58, "task_loss": 0.3931070566177368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7020513383637357, "compression_loss": 0.0, "distillation_loss": 1.1968752145767212, "epoch": 0.06, "learning_rate": 4.999287946914169e-05, "loss": 1.1309, "step": 59, "task_loss": 0.5368590950965881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7020864579982654, "compression_loss": 0.0, "distillation_loss": 0.7765418887138367, "epoch": 0.06, "learning_rate": 4.999262290651e-05, "loss": 0.7296, "step": 60, "task_loss": 0.3072131276130676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7021215692369944, "compression_loss": 0.0, "distillation_loss": 0.9318764209747314, "epoch": 0.06, "learning_rate": 4.9992361803847995e-05, "loss": 0.8886, "step": 61, "task_loss": 0.49911895394325256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7021566720809264, "compression_loss": 0.0, "distillation_loss": 0.6415928602218628, "epoch": 0.06, "learning_rate": 4.99920961612031e-05, "loss": 0.6061, "step": 62, "task_loss": 0.2865428924560547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.702191766531065, "compression_loss": 0.0, "distillation_loss": 0.8092690706253052, "epoch": 0.06, "learning_rate": 4.9991825978623574e-05, "loss": 0.7653, "step": 63, "task_loss": 0.3699354827404022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7022268525884139, "compression_loss": 0.0, "distillation_loss": 0.618144690990448, "epoch": 0.06, "learning_rate": 4.9991551256158495e-05, "loss": 0.5829, "step": 64, "task_loss": 0.26606857776641846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7022619302539769, "compression_loss": 0.0, "distillation_loss": 0.8759365677833557, "epoch": 0.06, "learning_rate": 4.999127199385778e-05, "loss": 0.8231, "step": 65, "task_loss": 0.3478449881076813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7022969995287576, "compression_loss": 0.0, "distillation_loss": 0.4816649556159973, "epoch": 0.06, "learning_rate": 4.999098819177214e-05, "loss": 0.4514, "step": 66, "task_loss": 0.17922161519527435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7023320604137597, "compression_loss": 0.0, "distillation_loss": 0.5684869289398193, "epoch": 0.06, "learning_rate": 4.999069984995314e-05, "loss": 0.535, "step": 67, "task_loss": 0.23375429213047028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7023671129099868, "compression_loss": 0.0, "distillation_loss": 0.7121763825416565, "epoch": 0.06, "learning_rate": 4.999040696845315e-05, "loss": 0.6668, "step": 68, "task_loss": 0.25801488757133484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7024021570184427, "compression_loss": 0.0, "distillation_loss": 0.5653649568557739, "epoch": 0.07, "learning_rate": 4.999010954732538e-05, "loss": 0.5359, "step": 69, "task_loss": 0.27088284492492676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7024371927401311, "compression_loss": 0.0, "distillation_loss": 0.8046922087669373, "epoch": 0.07, "learning_rate": 4.998980758662386e-05, "loss": 0.7569, "step": 70, "task_loss": 0.3263823986053467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7024722200760555, "compression_loss": 0.0, "distillation_loss": 0.6771596670150757, "epoch": 0.07, "learning_rate": 4.998950108640345e-05, "loss": 0.6458, "step": 71, "task_loss": 0.3636930286884308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7025072390272197, "compression_loss": 0.0, "distillation_loss": 0.41592103242874146, "epoch": 0.07, "learning_rate": 4.99891900467198e-05, "loss": 0.3859, "step": 72, "task_loss": 0.11569308489561081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7025422495946274, "compression_loss": 0.0, "distillation_loss": 0.5050232410430908, "epoch": 0.07, "learning_rate": 4.9988874467629435e-05, "loss": 0.4809, "step": 73, "task_loss": 0.26406604051589966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7025772517792822, "compression_loss": 0.0, "distillation_loss": 0.7662781476974487, "epoch": 0.07, "learning_rate": 4.998855434918968e-05, "loss": 0.7258, "step": 74, "task_loss": 0.36163684725761414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7026122455821879, "compression_loss": 0.0, "distillation_loss": 0.5249364376068115, "epoch": 0.07, "learning_rate": 4.998822969145868e-05, "loss": 0.4962, "step": 75, "task_loss": 0.23764313757419586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7026472310043481, "compression_loss": 0.0, "distillation_loss": 0.755010187625885, "epoch": 0.07, "learning_rate": 4.99879004944954e-05, "loss": 0.7195, "step": 76, "task_loss": 0.4000910520553589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7026822080467666, "compression_loss": 0.0, "distillation_loss": 0.5521838068962097, "epoch": 0.07, "learning_rate": 4.998756675835966e-05, "loss": 0.5156, "step": 77, "task_loss": 0.1858932077884674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7027171767104469, "compression_loss": 0.0, "distillation_loss": 0.7275650501251221, "epoch": 0.07, "learning_rate": 4.9987228483112083e-05, "loss": 0.694, "step": 78, "task_loss": 0.39170515537261963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7027521369963928, "compression_loss": 0.0, "distillation_loss": 0.6621497869491577, "epoch": 0.08, "learning_rate": 4.998688566881411e-05, "loss": 0.6312, "step": 79, "task_loss": 0.35260745882987976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7027870889056079, "compression_loss": 0.0, "distillation_loss": 0.8724344968795776, "epoch": 0.08, "learning_rate": 4.998653831552801e-05, "loss": 0.8453, "step": 80, "task_loss": 0.6009440422058105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.702822032439096, "compression_loss": 0.0, "distillation_loss": 0.6811240911483765, "epoch": 0.08, "learning_rate": 4.998618642331689e-05, "loss": 0.6469, "step": 81, "task_loss": 0.33839285373687744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7028569675978606, "compression_loss": 0.0, "distillation_loss": 0.5384796857833862, "epoch": 0.08, "learning_rate": 4.9985829992244675e-05, "loss": 0.508, "step": 82, "task_loss": 0.2334153652191162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7028918943829057, "compression_loss": 0.0, "distillation_loss": 0.8584867715835571, "epoch": 0.08, "learning_rate": 4.998546902237611e-05, "loss": 0.811, "step": 83, "task_loss": 0.38361459970474243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7029268127952346, "compression_loss": 0.0, "distillation_loss": 0.419251024723053, "epoch": 0.08, "learning_rate": 4.9985103513776764e-05, "loss": 0.4111, "step": 84, "task_loss": 0.3377459645271301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7029617228358512, "compression_loss": 0.0, "distillation_loss": 0.8462650179862976, "epoch": 0.08, "learning_rate": 4.998473346651303e-05, "loss": 0.8097, "step": 85, "task_loss": 0.48073211312294006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7029966245057592, "compression_loss": 0.0, "distillation_loss": 1.191063404083252, "epoch": 0.08, "learning_rate": 4.9984358880652146e-05, "loss": 1.1373, "step": 86, "task_loss": 0.653830349445343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7030315178059622, "compression_loss": 0.0, "distillation_loss": 0.8207377195358276, "epoch": 0.08, "learning_rate": 4.9983979756262136e-05, "loss": 0.7749, "step": 87, "task_loss": 0.3625227212905884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7030664027374639, "compression_loss": 0.0, "distillation_loss": 0.5961652994155884, "epoch": 0.08, "learning_rate": 4.998359609341188e-05, "loss": 0.5955, "step": 88, "task_loss": 0.589705228805542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.703101279301268, "compression_loss": 0.0, "distillation_loss": 0.7842130661010742, "epoch": 0.08, "learning_rate": 4.9983207892171074e-05, "loss": 0.7404, "step": 89, "task_loss": 0.3462582230567932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7031361474983782, "compression_loss": 0.0, "distillation_loss": 0.8764528036117554, "epoch": 0.09, "learning_rate": 4.998281515261023e-05, "loss": 0.83, "step": 90, "task_loss": 0.41157200932502747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7031710073297981, "compression_loss": 0.0, "distillation_loss": 1.0387656688690186, "epoch": 0.09, "learning_rate": 4.9982417874800704e-05, "loss": 0.9859, "step": 91, "task_loss": 0.5104459524154663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7032058587965315, "compression_loss": 0.0, "distillation_loss": 0.4702117443084717, "epoch": 0.09, "learning_rate": 4.998201605881465e-05, "loss": 0.4451, "step": 92, "task_loss": 0.2186840921640396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.703240701899582, "compression_loss": 0.0, "distillation_loss": 0.5508553981781006, "epoch": 0.09, "learning_rate": 4.9981609704725057e-05, "loss": 0.516, "step": 93, "task_loss": 0.20186278223991394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7032755366399532, "compression_loss": 0.0, "distillation_loss": 0.4743342697620392, "epoch": 0.09, "learning_rate": 4.998119881260576e-05, "loss": 0.4436, "step": 94, "task_loss": 0.16690446436405182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.703310363018649, "compression_loss": 0.0, "distillation_loss": 0.7569644451141357, "epoch": 0.09, "learning_rate": 4.9980783382531376e-05, "loss": 0.7158, "step": 95, "task_loss": 0.3451739251613617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7033451810366729, "compression_loss": 0.0, "distillation_loss": 0.6965117454528809, "epoch": 0.09, "learning_rate": 4.998036341457739e-05, "loss": 0.659, "step": 96, "task_loss": 0.32098302245140076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7033799906950287, "compression_loss": 0.0, "distillation_loss": 0.8976222276687622, "epoch": 0.09, "learning_rate": 4.997993890882008e-05, "loss": 0.8545, "step": 97, "task_loss": 0.46677258610725403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.70341479199472, "compression_loss": 0.0, "distillation_loss": 1.0592153072357178, "epoch": 0.09, "learning_rate": 4.997950986533656e-05, "loss": 1.0024, "step": 98, "task_loss": 0.4912152886390686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7034495849367505, "compression_loss": 0.0, "distillation_loss": 0.7887797355651855, "epoch": 0.09, "learning_rate": 4.997907628420477e-05, "loss": 0.7517, "step": 99, "task_loss": 0.4175463318824768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7034843695221239, "compression_loss": 0.0, "distillation_loss": 0.7304688692092896, "epoch": 0.09, "learning_rate": 4.9978638165503475e-05, "loss": 0.6888, "step": 100, "task_loss": 0.3141331076622009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7035191457518439, "compression_loss": 0.0, "distillation_loss": 0.7318391799926758, "epoch": 0.1, "learning_rate": 4.9978195509312266e-05, "loss": 0.7035, "step": 101, "task_loss": 0.4489053189754486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7035539136269141, "compression_loss": 0.0, "distillation_loss": 0.5332415699958801, "epoch": 0.1, "learning_rate": 4.997774831571154e-05, "loss": 0.5093, "step": 102, "task_loss": 0.2942197620868683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7035886731483383, "compression_loss": 0.0, "distillation_loss": 0.6230442523956299, "epoch": 0.1, "learning_rate": 4.9977296584782544e-05, "loss": 0.5832, "step": 103, "task_loss": 0.2250797152519226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7036234243171201, "compression_loss": 0.0, "distillation_loss": 0.28762802481651306, "epoch": 0.1, "learning_rate": 4.997684031660732e-05, "loss": 0.2797, "step": 104, "task_loss": 0.20829369127750397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7036581671342632, "compression_loss": 0.0, "distillation_loss": 0.7059702277183533, "epoch": 0.1, "learning_rate": 4.997637951126877e-05, "loss": 0.6714, "step": 105, "task_loss": 0.36017781496047974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7036929016007714, "compression_loss": 0.0, "distillation_loss": 0.9591817855834961, "epoch": 0.1, "learning_rate": 4.997591416885059e-05, "loss": 0.9062, "step": 106, "task_loss": 0.4291348457336426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7037276277176481, "compression_loss": 0.0, "distillation_loss": 0.5214189887046814, "epoch": 0.1, "learning_rate": 4.997544428943732e-05, "loss": 0.4899, "step": 107, "task_loss": 0.20646995306015015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7037623454858972, "compression_loss": 0.0, "distillation_loss": 0.7594773769378662, "epoch": 0.1, "learning_rate": 4.997496987311431e-05, "loss": 0.7246, "step": 108, "task_loss": 0.41065266728401184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7037970549065223, "compression_loss": 0.0, "distillation_loss": 1.0592186450958252, "epoch": 0.1, "learning_rate": 4.997449091996774e-05, "loss": 1.0102, "step": 109, "task_loss": 0.5689947605133057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7038317559805272, "compression_loss": 0.0, "distillation_loss": 0.717729389667511, "epoch": 0.1, "learning_rate": 4.9974007430084617e-05, "loss": 0.693, "step": 110, "task_loss": 0.47034698724746704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7038664487089155, "compression_loss": 0.0, "distillation_loss": 0.5633364915847778, "epoch": 0.11, "learning_rate": 4.997351940355277e-05, "loss": 0.5359, "step": 111, "task_loss": 0.2890656292438507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7039011330926908, "compression_loss": 0.0, "distillation_loss": 0.8346014618873596, "epoch": 0.11, "learning_rate": 4.997302684046085e-05, "loss": 0.7913, "step": 112, "task_loss": 0.4014682173728943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7039358091328568, "compression_loss": 0.0, "distillation_loss": 0.6383477449417114, "epoch": 0.11, "learning_rate": 4.997252974089833e-05, "loss": 0.6, "step": 113, "task_loss": 0.25466716289520264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7039704768304174, "compression_loss": 0.0, "distillation_loss": 1.142728328704834, "epoch": 0.11, "learning_rate": 4.997202810495551e-05, "loss": 1.0939, "step": 114, "task_loss": 0.6547843813896179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7040051361863761, "compression_loss": 0.0, "distillation_loss": 0.4164957106113434, "epoch": 0.11, "learning_rate": 4.997152193272353e-05, "loss": 0.391, "step": 115, "task_loss": 0.16178376972675323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7040397872017365, "compression_loss": 0.0, "distillation_loss": 0.24880273640155792, "epoch": 0.11, "learning_rate": 4.9971011224294314e-05, "loss": 0.2379, "step": 116, "task_loss": 0.14008797705173492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7040744298775025, "compression_loss": 0.0, "distillation_loss": 0.9239636659622192, "epoch": 0.11, "learning_rate": 4.997049597976066e-05, "loss": 0.884, "step": 117, "task_loss": 0.5244091749191284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7041090642146776, "compression_loss": 0.0, "distillation_loss": 0.43864983320236206, "epoch": 0.11, "learning_rate": 4.9969976199216144e-05, "loss": 0.4116, "step": 118, "task_loss": 0.16765302419662476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7041436902142656, "compression_loss": 0.0, "distillation_loss": 0.5404030084609985, "epoch": 0.11, "learning_rate": 4.9969451882755196e-05, "loss": 0.5152, "step": 119, "task_loss": 0.28787752985954285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7041783078772701, "compression_loss": 0.0, "distillation_loss": 0.4323447644710541, "epoch": 0.11, "learning_rate": 4.996892303047306e-05, "loss": 0.4133, "step": 120, "task_loss": 0.24157865345478058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7042129172046948, "compression_loss": 0.0, "distillation_loss": 0.6243703365325928, "epoch": 0.11, "learning_rate": 4.996838964246581e-05, "loss": 0.5941, "step": 121, "task_loss": 0.3215750753879547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7042475181975434, "compression_loss": 0.0, "distillation_loss": 0.7456376552581787, "epoch": 0.12, "learning_rate": 4.996785171883032e-05, "loss": 0.7073, "step": 122, "task_loss": 0.3622223436832428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7042821108568197, "compression_loss": 0.0, "distillation_loss": 0.7294185161590576, "epoch": 0.12, "learning_rate": 4.996730925966433e-05, "loss": 0.694, "step": 123, "task_loss": 0.3748391270637512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7043166951835271, "compression_loss": 0.0, "distillation_loss": 1.1894690990447998, "epoch": 0.12, "learning_rate": 4.996676226506636e-05, "loss": 1.1356, "step": 124, "task_loss": 0.6504833102226257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7043512711786696, "compression_loss": 0.0, "distillation_loss": 0.6352778673171997, "epoch": 0.12, "learning_rate": 4.9966210735135785e-05, "loss": 0.6021, "step": 125, "task_loss": 0.3033022880554199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7043858388432506, "compression_loss": 0.0, "distillation_loss": 0.4930354058742523, "epoch": 0.12, "learning_rate": 4.9965654669972794e-05, "loss": 0.4721, "step": 126, "task_loss": 0.2833815813064575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.704420398178274, "compression_loss": 0.0, "distillation_loss": 1.0228924751281738, "epoch": 0.12, "learning_rate": 4.99650940696784e-05, "loss": 0.9778, "step": 127, "task_loss": 0.571823000907898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7044549491847434, "compression_loss": 0.0, "distillation_loss": 0.5655504465103149, "epoch": 0.12, "learning_rate": 4.996452893435442e-05, "loss": 0.5301, "step": 128, "task_loss": 0.21055805683135986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7044894918636624, "compression_loss": 0.0, "distillation_loss": 0.6770893335342407, "epoch": 0.12, "learning_rate": 4.9963959264103544e-05, "loss": 0.6397, "step": 129, "task_loss": 0.30343693494796753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7045240262160347, "compression_loss": 0.0, "distillation_loss": 0.4913747012615204, "epoch": 0.12, "learning_rate": 4.996338505902924e-05, "loss": 0.4605, "step": 130, "task_loss": 0.1823093295097351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7045585522428641, "compression_loss": 0.0, "distillation_loss": 0.5457890033721924, "epoch": 0.12, "learning_rate": 4.996280631923581e-05, "loss": 0.5186, "step": 131, "task_loss": 0.2735254168510437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7045930699451544, "compression_loss": 0.0, "distillation_loss": 0.2785548269748688, "epoch": 0.13, "learning_rate": 4.9962223044828396e-05, "loss": 0.2587, "step": 132, "task_loss": 0.08010871708393097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7046275793239088, "compression_loss": 0.0, "distillation_loss": 0.49103331565856934, "epoch": 0.13, "learning_rate": 4.9961635235912935e-05, "loss": 0.4746, "step": 133, "task_loss": 0.32637351751327515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7046620803801315, "compression_loss": 0.0, "distillation_loss": 0.3778892755508423, "epoch": 0.13, "learning_rate": 4.9961042892596225e-05, "loss": 0.3622, "step": 134, "task_loss": 0.2211739420890808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7046965731148259, "compression_loss": 0.0, "distillation_loss": 0.4187111258506775, "epoch": 0.13, "learning_rate": 4.996044601498586e-05, "loss": 0.3963, "step": 135, "task_loss": 0.1942349374294281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7047310575289957, "compression_loss": 0.0, "distillation_loss": 0.5608755350112915, "epoch": 0.13, "learning_rate": 4.995984460319026e-05, "loss": 0.5274, "step": 136, "task_loss": 0.22652751207351685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7047655336236447, "compression_loss": 0.0, "distillation_loss": 0.642856776714325, "epoch": 0.13, "learning_rate": 4.995923865731869e-05, "loss": 0.6072, "step": 137, "task_loss": 0.28608477115631104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7048000013997765, "compression_loss": 0.0, "distillation_loss": 0.6759918928146362, "epoch": 0.13, "learning_rate": 4.9958628177481195e-05, "loss": 0.6456, "step": 138, "task_loss": 0.37211018800735474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7048344608583947, "compression_loss": 0.0, "distillation_loss": 0.5849887728691101, "epoch": 0.13, "learning_rate": 4.99580131637887e-05, "loss": 0.5497, "step": 139, "task_loss": 0.23251289129257202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7048689120005032, "compression_loss": 0.0, "distillation_loss": 0.5638033151626587, "epoch": 0.13, "learning_rate": 4.995739361635292e-05, "loss": 0.5441, "step": 140, "task_loss": 0.367082804441452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7049033548271055, "compression_loss": 0.0, "distillation_loss": 0.4791497588157654, "epoch": 0.13, "learning_rate": 4.9956769535286385e-05, "loss": 0.4515, "step": 141, "task_loss": 0.20264172554016113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7049377893392055, "compression_loss": 0.0, "distillation_loss": 0.45656701922416687, "epoch": 0.13, "learning_rate": 4.9956140920702476e-05, "loss": 0.4341, "step": 142, "task_loss": 0.23189952969551086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7049722155378065, "compression_loss": 0.0, "distillation_loss": 0.5628130435943604, "epoch": 0.14, "learning_rate": 4.995550777271538e-05, "loss": 0.5244, "step": 143, "task_loss": 0.1786140650510788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7050066334239125, "compression_loss": 0.0, "distillation_loss": 0.8593398332595825, "epoch": 0.14, "learning_rate": 4.995487009144011e-05, "loss": 0.817, "step": 144, "task_loss": 0.43626001477241516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7050410429985271, "compression_loss": 0.0, "distillation_loss": 0.6192554235458374, "epoch": 0.14, "learning_rate": 4.99542278769925e-05, "loss": 0.5859, "step": 145, "task_loss": 0.2856113016605377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7050754442626539, "compression_loss": 0.0, "distillation_loss": 0.46989959478378296, "epoch": 0.14, "learning_rate": 4.995358112948921e-05, "loss": 0.4416, "step": 146, "task_loss": 0.18703171610832214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7051098372172967, "compression_loss": 0.0, "distillation_loss": 0.7966834902763367, "epoch": 0.14, "learning_rate": 4.9952929849047734e-05, "loss": 0.754, "step": 147, "task_loss": 0.3696812093257904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7051442218634592, "compression_loss": 0.0, "distillation_loss": 0.8801038265228271, "epoch": 0.14, "learning_rate": 4.9952274035786385e-05, "loss": 0.8282, "step": 148, "task_loss": 0.3614765405654907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7051785982021449, "compression_loss": 0.0, "distillation_loss": 0.8862295150756836, "epoch": 0.14, "learning_rate": 4.9951613689824276e-05, "loss": 0.8481, "step": 149, "task_loss": 0.5047279596328735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7052129662343577, "compression_loss": 0.0, "distillation_loss": 0.7615218162536621, "epoch": 0.14, "learning_rate": 4.995094881128138e-05, "loss": 0.7294, "step": 150, "task_loss": 0.4406706392765045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7052473259611011, "compression_loss": 0.0, "distillation_loss": 0.5523242950439453, "epoch": 0.14, "learning_rate": 4.995027940027846e-05, "loss": 0.5313, "step": 151, "task_loss": 0.3420305848121643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7052816773833789, "compression_loss": 0.0, "distillation_loss": 0.43813109397888184, "epoch": 0.14, "learning_rate": 4.9949605456937135e-05, "loss": 0.4062, "step": 152, "task_loss": 0.11931411176919937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7053160205021947, "compression_loss": 0.0, "distillation_loss": 0.6433530449867249, "epoch": 0.15, "learning_rate": 4.994892698137981e-05, "loss": 0.608, "step": 153, "task_loss": 0.2893901765346527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7053503553185523, "compression_loss": 0.0, "distillation_loss": 0.6064528226852417, "epoch": 0.15, "learning_rate": 4.9948243973729745e-05, "loss": 0.5714, "step": 154, "task_loss": 0.25594377517700195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7053846818334553, "compression_loss": 0.0, "distillation_loss": 0.7984007000923157, "epoch": 0.15, "learning_rate": 4.994755643411101e-05, "loss": 0.7624, "step": 155, "task_loss": 0.43833673000335693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7054190000479073, "compression_loss": 0.0, "distillation_loss": 0.5333908796310425, "epoch": 0.15, "learning_rate": 4.9946864362648506e-05, "loss": 0.5041, "step": 156, "task_loss": 0.240193709731102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7054533099629121, "compression_loss": 0.0, "distillation_loss": 0.5313940048217773, "epoch": 0.15, "learning_rate": 4.994616775946794e-05, "loss": 0.5092, "step": 157, "task_loss": 0.3092755973339081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7054876115794735, "compression_loss": 0.0, "distillation_loss": 0.6694386005401611, "epoch": 0.15, "learning_rate": 4.994546662469586e-05, "loss": 0.6325, "step": 158, "task_loss": 0.3002847731113434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7055219048985948, "compression_loss": 0.0, "distillation_loss": 0.43738991022109985, "epoch": 0.15, "learning_rate": 4.9944760958459624e-05, "loss": 0.4088, "step": 159, "task_loss": 0.15195739269256592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7055561899212801, "compression_loss": 0.0, "distillation_loss": 0.6324198842048645, "epoch": 0.15, "learning_rate": 4.994405076088743e-05, "loss": 0.6241, "step": 160, "task_loss": 0.5489103198051453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7055904666485329, "compression_loss": 0.0, "distillation_loss": 0.4191284775733948, "epoch": 0.15, "learning_rate": 4.994333603210829e-05, "loss": 0.4006, "step": 161, "task_loss": 0.23342673480510712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7056247350813567, "compression_loss": 0.0, "distillation_loss": 0.5982824563980103, "epoch": 0.15, "learning_rate": 4.9942616772252016e-05, "loss": 0.5746, "step": 162, "task_loss": 0.3616164028644562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7056589952207556, "compression_loss": 0.0, "distillation_loss": 0.279816210269928, "epoch": 0.15, "learning_rate": 4.994189298144929e-05, "loss": 0.2647, "step": 163, "task_loss": 0.12842896580696106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7056932470677328, "compression_loss": 0.0, "distillation_loss": 0.26615631580352783, "epoch": 0.16, "learning_rate": 4.994116465983158e-05, "loss": 0.2481, "step": 164, "task_loss": 0.08531204611063004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7057274906232924, "compression_loss": 0.0, "distillation_loss": 0.7699984908103943, "epoch": 0.16, "learning_rate": 4.99404318075312e-05, "loss": 0.7333, "step": 165, "task_loss": 0.4031785726547241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7057617258884379, "compression_loss": 0.0, "distillation_loss": 0.32784250378608704, "epoch": 0.16, "learning_rate": 4.993969442468125e-05, "loss": 0.3131, "step": 166, "task_loss": 0.1799994260072708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7057959528641731, "compression_loss": 0.0, "distillation_loss": 0.5546020269393921, "epoch": 0.16, "learning_rate": 4.993895251141571e-05, "loss": 0.5365, "step": 167, "task_loss": 0.37407755851745605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7058301715515014, "compression_loss": 0.0, "distillation_loss": 0.24405278265476227, "epoch": 0.16, "learning_rate": 4.9938206067869334e-05, "loss": 0.2312, "step": 168, "task_loss": 0.11586499214172363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7058643819514266, "compression_loss": 0.0, "distillation_loss": 0.897857666015625, "epoch": 0.16, "learning_rate": 4.993745509417772e-05, "loss": 0.8501, "step": 169, "task_loss": 0.42022186517715454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7058985840649525, "compression_loss": 0.0, "distillation_loss": 0.62211012840271, "epoch": 0.16, "learning_rate": 4.9936699590477296e-05, "loss": 0.5941, "step": 170, "task_loss": 0.3424464166164398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7059327778930828, "compression_loss": 0.0, "distillation_loss": 0.4366634488105774, "epoch": 0.16, "learning_rate": 4.9935939556905295e-05, "loss": 0.4205, "step": 171, "task_loss": 0.27515465021133423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7059669634368212, "compression_loss": 0.0, "distillation_loss": 0.4915463626384735, "epoch": 0.16, "learning_rate": 4.993517499359978e-05, "loss": 0.4641, "step": 172, "task_loss": 0.2167641520500183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7060011406971711, "compression_loss": 0.0, "distillation_loss": 0.5092770457267761, "epoch": 0.16, "learning_rate": 4.993440590069963e-05, "loss": 0.4841, "step": 173, "task_loss": 0.257138729095459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7060353096751364, "compression_loss": 0.0, "distillation_loss": 0.5051209926605225, "epoch": 0.17, "learning_rate": 4.993363227834457e-05, "loss": 0.4766, "step": 174, "task_loss": 0.22033719718456268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7060694703717209, "compression_loss": 0.0, "distillation_loss": 0.6676339507102966, "epoch": 0.17, "learning_rate": 4.9932854126675124e-05, "loss": 0.6391, "step": 175, "task_loss": 0.3822685778141022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7061036227879279, "compression_loss": 0.0, "distillation_loss": 0.2579382061958313, "epoch": 0.17, "learning_rate": 4.993207144583264e-05, "loss": 0.2493, "step": 176, "task_loss": 0.17196613550186157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7061377669247615, "compression_loss": 0.0, "distillation_loss": 0.39016398787498474, "epoch": 0.17, "learning_rate": 4.993128423595931e-05, "loss": 0.3674, "step": 177, "task_loss": 0.16221883893013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7061719027832251, "compression_loss": 0.0, "distillation_loss": 0.5886770486831665, "epoch": 0.17, "learning_rate": 4.9930492497198125e-05, "loss": 0.5569, "step": 178, "task_loss": 0.2707720994949341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7062060303643225, "compression_loss": 0.0, "distillation_loss": 0.31564319133758545, "epoch": 0.17, "learning_rate": 4.992969622969292e-05, "loss": 0.3039, "step": 179, "task_loss": 0.19809630513191223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7062401496690573, "compression_loss": 0.0, "distillation_loss": 0.35320743918418884, "epoch": 0.17, "learning_rate": 4.992889543358832e-05, "loss": 0.3371, "step": 180, "task_loss": 0.19164326786994934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7062742606984334, "compression_loss": 0.0, "distillation_loss": 0.8013370633125305, "epoch": 0.17, "learning_rate": 4.9928090109029817e-05, "loss": 0.7688, "step": 181, "task_loss": 0.4764384627342224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7063083634534542, "compression_loss": 0.0, "distillation_loss": 1.4201560020446777, "epoch": 0.17, "learning_rate": 4.9927280256163686e-05, "loss": 1.3439, "step": 182, "task_loss": 0.6578447222709656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7063424579351235, "compression_loss": 0.0, "distillation_loss": 0.35098546743392944, "epoch": 0.17, "learning_rate": 4.992646587513705e-05, "loss": 0.3229, "step": 183, "task_loss": 0.07008456438779831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7063765441444451, "compression_loss": 0.0, "distillation_loss": 0.6404779553413391, "epoch": 0.17, "learning_rate": 4.9925646966097835e-05, "loss": 0.6098, "step": 184, "task_loss": 0.3341161012649536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7064106220824224, "compression_loss": 0.0, "distillation_loss": 0.4935486912727356, "epoch": 0.18, "learning_rate": 4.99248235291948e-05, "loss": 0.4679, "step": 185, "task_loss": 0.2367597073316574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7064446917500594, "compression_loss": 0.0, "distillation_loss": 0.3535561263561249, "epoch": 0.18, "learning_rate": 4.9923995564577544e-05, "loss": 0.3314, "step": 186, "task_loss": 0.13228961825370789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7064787531483596, "compression_loss": 0.0, "distillation_loss": 0.6032556295394897, "epoch": 0.18, "learning_rate": 4.992316307239645e-05, "loss": 0.572, "step": 187, "task_loss": 0.2907094657421112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7065128062783267, "compression_loss": 0.0, "distillation_loss": 1.0592761039733887, "epoch": 0.18, "learning_rate": 4.992232605280276e-05, "loss": 1.0138, "step": 188, "task_loss": 0.6046409606933594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7065468511409644, "compression_loss": 0.0, "distillation_loss": 0.32051318883895874, "epoch": 0.18, "learning_rate": 4.992148450594851e-05, "loss": 0.3152, "step": 189, "task_loss": 0.26700055599212646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7065808877372763, "compression_loss": 0.0, "distillation_loss": 0.4926970899105072, "epoch": 0.18, "learning_rate": 4.9920638431986574e-05, "loss": 0.4646, "step": 190, "task_loss": 0.21135534346103668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7066149160682663, "compression_loss": 0.0, "distillation_loss": 0.7110521793365479, "epoch": 0.18, "learning_rate": 4.991978783107065e-05, "loss": 0.6783, "step": 191, "task_loss": 0.38350385427474976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.706648936134938, "compression_loss": 0.0, "distillation_loss": 0.4119304120540619, "epoch": 0.18, "learning_rate": 4.9918932703355256e-05, "loss": 0.3859, "step": 192, "task_loss": 0.15174490213394165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7066829479382948, "compression_loss": 0.0, "distillation_loss": 0.5923248529434204, "epoch": 0.18, "learning_rate": 4.991807304899572e-05, "loss": 0.5592, "step": 193, "task_loss": 0.26117557287216187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7067169514793408, "compression_loss": 0.0, "distillation_loss": 0.41325411200523376, "epoch": 0.18, "learning_rate": 4.991720886814821e-05, "loss": 0.3904, "step": 194, "task_loss": 0.18504031002521515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7067509467590795, "compression_loss": 0.0, "distillation_loss": 0.4917716085910797, "epoch": 0.19, "learning_rate": 4.99163401609697e-05, "loss": 0.4618, "step": 195, "task_loss": 0.1917870044708252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7067849337785144, "compression_loss": 0.0, "distillation_loss": 0.48008596897125244, "epoch": 0.19, "learning_rate": 4.991546692761801e-05, "loss": 0.4578, "step": 196, "task_loss": 0.25695863366127014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7068189125386495, "compression_loss": 0.0, "distillation_loss": 0.8246874213218689, "epoch": 0.19, "learning_rate": 4.991458916825176e-05, "loss": 0.7932, "step": 197, "task_loss": 0.5102267265319824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7068528830404883, "compression_loss": 0.0, "distillation_loss": 0.49332180619239807, "epoch": 0.19, "learning_rate": 4.991370688303039e-05, "loss": 0.4696, "step": 198, "task_loss": 0.25608059763908386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7068868452850345, "compression_loss": 0.0, "distillation_loss": 0.7726448178291321, "epoch": 0.19, "learning_rate": 4.9912820072114185e-05, "loss": 0.7387, "step": 199, "task_loss": 0.43347033858299255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7069207992732918, "compression_loss": 0.0, "distillation_loss": 0.3915247917175293, "epoch": 0.19, "learning_rate": 4.9911928735664224e-05, "loss": 0.368, "step": 200, "task_loss": 0.1565304845571518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7069547450062639, "compression_loss": 0.0, "distillation_loss": 0.39359933137893677, "epoch": 0.19, "learning_rate": 4.991103287384244e-05, "loss": 0.3707, "step": 201, "task_loss": 0.1646425426006317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7069886824849545, "compression_loss": 0.0, "distillation_loss": 0.30474036931991577, "epoch": 0.19, "learning_rate": 4.9910132486811555e-05, "loss": 0.2895, "step": 202, "task_loss": 0.15248741209506989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7070226117103672, "compression_loss": 0.0, "distillation_loss": 0.5055716037750244, "epoch": 0.19, "learning_rate": 4.990922757473514e-05, "loss": 0.4763, "step": 203, "task_loss": 0.21311217546463013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7070565326835058, "compression_loss": 0.0, "distillation_loss": 0.6619958877563477, "epoch": 0.19, "learning_rate": 4.990831813777757e-05, "loss": 0.6328, "step": 204, "task_loss": 0.36974024772644043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7070904454053738, "compression_loss": 0.0, "distillation_loss": 0.809003472328186, "epoch": 0.19, "learning_rate": 4.990740417610406e-05, "loss": 0.7783, "step": 205, "task_loss": 0.5022717714309692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.707124349876975, "compression_loss": 0.0, "distillation_loss": 0.9417250156402588, "epoch": 0.2, "learning_rate": 4.9906485689880613e-05, "loss": 0.898, "step": 206, "task_loss": 0.5048108100891113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7071582460993132, "compression_loss": 0.0, "distillation_loss": 0.4026396870613098, "epoch": 0.2, "learning_rate": 4.9905562679274096e-05, "loss": 0.3856, "step": 207, "task_loss": 0.23229239881038666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7071921340733919, "compression_loss": 0.0, "distillation_loss": 0.45894497632980347, "epoch": 0.2, "learning_rate": 4.9904635144452164e-05, "loss": 0.4355, "step": 208, "task_loss": 0.22411760687828064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7072260138002149, "compression_loss": 0.0, "distillation_loss": 0.5892167091369629, "epoch": 0.2, "learning_rate": 4.990370308558332e-05, "loss": 0.5551, "step": 209, "task_loss": 0.24835461378097534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7072598852807856, "compression_loss": 0.0, "distillation_loss": 0.5774100422859192, "epoch": 0.2, "learning_rate": 4.9902766502836874e-05, "loss": 0.5463, "step": 210, "task_loss": 0.2667173445224762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7072937485161082, "compression_loss": 0.0, "distillation_loss": 0.33134809136390686, "epoch": 0.2, "learning_rate": 4.9901825396382965e-05, "loss": 0.3096, "step": 211, "task_loss": 0.11349479109048843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7073276035071859, "compression_loss": 0.0, "distillation_loss": 0.46322399377822876, "epoch": 0.2, "learning_rate": 4.990087976639254e-05, "loss": 0.4346, "step": 212, "task_loss": 0.17661762237548828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7073614502550227, "compression_loss": 0.0, "distillation_loss": 0.4679402709007263, "epoch": 0.2, "learning_rate": 4.989992961303738e-05, "loss": 0.445, "step": 213, "task_loss": 0.23898877203464508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7073952887606221, "compression_loss": 0.0, "distillation_loss": 0.44021981954574585, "epoch": 0.2, "learning_rate": 4.989897493649008e-05, "loss": 0.43, "step": 214, "task_loss": 0.3383277654647827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7074291190249877, "compression_loss": 0.0, "distillation_loss": 0.5056917071342468, "epoch": 0.2, "learning_rate": 4.989801573692408e-05, "loss": 0.4787, "step": 215, "task_loss": 0.23537424206733704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7074629410491236, "compression_loss": 0.0, "distillation_loss": 0.7158606052398682, "epoch": 0.21, "learning_rate": 4.989705201451361e-05, "loss": 0.6868, "step": 216, "task_loss": 0.4253765344619751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.707496754834033, "compression_loss": 0.0, "distillation_loss": 0.4985395669937134, "epoch": 0.21, "learning_rate": 4.989608376943373e-05, "loss": 0.4697, "step": 217, "task_loss": 0.21007469296455383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7075305603807197, "compression_loss": 0.0, "distillation_loss": 0.5073490738868713, "epoch": 0.21, "learning_rate": 4.9895111001860335e-05, "loss": 0.4779, "step": 218, "task_loss": 0.21258264780044556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7075643576901876, "compression_loss": 0.0, "distillation_loss": 0.37614893913269043, "epoch": 0.21, "learning_rate": 4.989413371197013e-05, "loss": 0.3534, "step": 219, "task_loss": 0.14843641221523285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7075981467634402, "compression_loss": 0.0, "distillation_loss": 0.6342090368270874, "epoch": 0.21, "learning_rate": 4.989315189994065e-05, "loss": 0.6009, "step": 220, "task_loss": 0.3015548586845398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7076319276014813, "compression_loss": 0.0, "distillation_loss": 0.7175476551055908, "epoch": 0.21, "learning_rate": 4.9892165565950235e-05, "loss": 0.6756, "step": 221, "task_loss": 0.2982741594314575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7076657002053144, "compression_loss": 0.0, "distillation_loss": 0.40784966945648193, "epoch": 0.21, "learning_rate": 4.9891174710178054e-05, "loss": 0.389, "step": 222, "task_loss": 0.21939508616924286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7076994645759433, "compression_loss": 0.0, "distillation_loss": 0.5739084482192993, "epoch": 0.21, "learning_rate": 4.9890179332804125e-05, "loss": 0.5553, "step": 223, "task_loss": 0.38766252994537354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7077332207143717, "compression_loss": 0.0, "distillation_loss": 0.44305968284606934, "epoch": 0.21, "learning_rate": 4.988917943400924e-05, "loss": 0.4178, "step": 224, "task_loss": 0.19036653637886047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7077669686216033, "compression_loss": 0.0, "distillation_loss": 0.714052140712738, "epoch": 0.21, "learning_rate": 4.988817501397505e-05, "loss": 0.6902, "step": 225, "task_loss": 0.4752587080001831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7078007082986417, "compression_loss": 0.0, "distillation_loss": 0.36877432465553284, "epoch": 0.21, "learning_rate": 4.9887166072884e-05, "loss": 0.3439, "step": 226, "task_loss": 0.11986754834651947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7078344397464906, "compression_loss": 0.0, "distillation_loss": 0.5401185750961304, "epoch": 0.22, "learning_rate": 4.988615261091938e-05, "loss": 0.5173, "step": 227, "task_loss": 0.31204986572265625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7078681629661537, "compression_loss": 0.0, "distillation_loss": 0.6624010801315308, "epoch": 0.22, "learning_rate": 4.9885134628265276e-05, "loss": 0.6317, "step": 228, "task_loss": 0.3551117479801178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7079018779586348, "compression_loss": 0.0, "distillation_loss": 0.5041351318359375, "epoch": 0.22, "learning_rate": 4.988411212510663e-05, "loss": 0.4863, "step": 229, "task_loss": 0.3257242739200592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7079355847249373, "compression_loss": 0.0, "distillation_loss": 0.5081358551979065, "epoch": 0.22, "learning_rate": 4.988308510162917e-05, "loss": 0.4942, "step": 230, "task_loss": 0.36878862977027893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7079692832660651, "compression_loss": 0.0, "distillation_loss": 0.4084128737449646, "epoch": 0.22, "learning_rate": 4.988205355801945e-05, "loss": 0.3874, "step": 231, "task_loss": 0.19869351387023926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7080029735830219, "compression_loss": 0.0, "distillation_loss": 0.6727085113525391, "epoch": 0.22, "learning_rate": 4.988101749446488e-05, "loss": 0.6462, "step": 232, "task_loss": 0.40740063786506653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7080366556768113, "compression_loss": 0.0, "distillation_loss": 0.3574530780315399, "epoch": 0.22, "learning_rate": 4.987997691115366e-05, "loss": 0.3417, "step": 233, "task_loss": 0.20038984715938568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708070329548437, "compression_loss": 0.0, "distillation_loss": 0.6499674916267395, "epoch": 0.22, "learning_rate": 4.98789318082748e-05, "loss": 0.6204, "step": 234, "task_loss": 0.3542225658893585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7081039951989025, "compression_loss": 0.0, "distillation_loss": 0.2822619080543518, "epoch": 0.22, "learning_rate": 4.987788218601816e-05, "loss": 0.2671, "step": 235, "task_loss": 0.13111086189746857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7081376526292118, "compression_loss": 0.0, "distillation_loss": 0.3758002519607544, "epoch": 0.22, "learning_rate": 4.987682804457441e-05, "loss": 0.3537, "step": 236, "task_loss": 0.15476220846176147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7081713018403685, "compression_loss": 0.0, "distillation_loss": 0.42200687527656555, "epoch": 0.23, "learning_rate": 4.987576938413504e-05, "loss": 0.4044, "step": 237, "task_loss": 0.24637091159820557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7082049428333762, "compression_loss": 0.0, "distillation_loss": 0.3074836730957031, "epoch": 0.23, "learning_rate": 4.987470620489235e-05, "loss": 0.2978, "step": 238, "task_loss": 0.21041469275951385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7082385756092385, "compression_loss": 0.0, "distillation_loss": 0.19442984461784363, "epoch": 0.23, "learning_rate": 4.9873638507039486e-05, "loss": 0.1873, "step": 239, "task_loss": 0.12338165193796158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7082722001689592, "compression_loss": 0.0, "distillation_loss": 0.3584289252758026, "epoch": 0.23, "learning_rate": 4.987256629077039e-05, "loss": 0.34, "step": 240, "task_loss": 0.17371408641338348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7083058165135421, "compression_loss": 0.0, "distillation_loss": 0.8099121451377869, "epoch": 0.23, "learning_rate": 4.987148955627985e-05, "loss": 0.7633, "step": 241, "task_loss": 0.34426361322402954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7083394246439906, "compression_loss": 0.0, "distillation_loss": 0.293813019990921, "epoch": 0.23, "learning_rate": 4.987040830376344e-05, "loss": 0.2808, "step": 242, "task_loss": 0.16348132491111755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7083730245613087, "compression_loss": 0.0, "distillation_loss": 0.5357306599617004, "epoch": 0.23, "learning_rate": 4.9869322533417596e-05, "loss": 0.504, "step": 243, "task_loss": 0.21806874871253967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7084066162664997, "compression_loss": 0.0, "distillation_loss": 0.6220539212226868, "epoch": 0.23, "learning_rate": 4.9868232245439525e-05, "loss": 0.5916, "step": 244, "task_loss": 0.3177708387374878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7084401997605676, "compression_loss": 0.0, "distillation_loss": 0.49369296431541443, "epoch": 0.23, "learning_rate": 4.986713744002731e-05, "loss": 0.4649, "step": 245, "task_loss": 0.20582452416419983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708473775044516, "compression_loss": 0.0, "distillation_loss": 0.46345996856689453, "epoch": 0.23, "learning_rate": 4.9866038117379824e-05, "loss": 0.4486, "step": 246, "task_loss": 0.31531885266304016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7085073421193486, "compression_loss": 0.0, "distillation_loss": 0.32199227809906006, "epoch": 0.23, "learning_rate": 4.986493427769675e-05, "loss": 0.3051, "step": 247, "task_loss": 0.15327921509742737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708540900986069, "compression_loss": 0.0, "distillation_loss": 0.4429311156272888, "epoch": 0.24, "learning_rate": 4.986382592117861e-05, "loss": 0.4221, "step": 248, "task_loss": 0.2345043569803238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7085744516456809, "compression_loss": 0.0, "distillation_loss": 0.47061067819595337, "epoch": 0.24, "learning_rate": 4.986271304802675e-05, "loss": 0.4492, "step": 249, "task_loss": 0.2561434507369995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7086079940991881, "compression_loss": 0.0, "distillation_loss": 0.4610680341720581, "epoch": 0.24, "learning_rate": 4.986159565844333e-05, "loss": 0.4471, "step": 250, "task_loss": 0.32182347774505615 }, { "epoch": 0.24, "eval_accuracy": 0.8532110091743119, "eval_loss": 0.6153932809829712, "eval_runtime": 14.8163, "eval_samples_per_second": 58.854, "eval_steps_per_second": 7.357, "step": 250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708641528347594, "compression_loss": 0.0, "distillation_loss": 0.53682541847229, "epoch": 0.24, "learning_rate": 4.986047375263131e-05, "loss": 0.513, "step": 251, "task_loss": 0.2982673645019531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7086750543919026, "compression_loss": 0.0, "distillation_loss": 0.29910629987716675, "epoch": 0.24, "learning_rate": 4.9859347330794515e-05, "loss": 0.2856, "step": 252, "task_loss": 0.16408580541610718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7087085722331175, "compression_loss": 0.0, "distillation_loss": 0.49715662002563477, "epoch": 0.24, "learning_rate": 4.985821639313755e-05, "loss": 0.4719, "step": 253, "task_loss": 0.24493193626403809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7087420818722422, "compression_loss": 0.0, "distillation_loss": 0.6056504249572754, "epoch": 0.24, "learning_rate": 4.985708093986586e-05, "loss": 0.5746, "step": 254, "task_loss": 0.29507917165756226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7087755833102806, "compression_loss": 0.0, "distillation_loss": 0.5323020219802856, "epoch": 0.24, "learning_rate": 4.98559409711857e-05, "loss": 0.5011, "step": 255, "task_loss": 0.2197890430688858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7088090765482363, "compression_loss": 0.0, "distillation_loss": 0.5494606494903564, "epoch": 0.24, "learning_rate": 4.985479648730416e-05, "loss": 0.5286, "step": 256, "task_loss": 0.3407706320285797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708842561587113, "compression_loss": 0.0, "distillation_loss": 0.35420656204223633, "epoch": 0.24, "learning_rate": 4.985364748842914e-05, "loss": 0.3456, "step": 257, "task_loss": 0.2684999108314514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7088760384279144, "compression_loss": 0.0, "distillation_loss": 0.4922611713409424, "epoch": 0.25, "learning_rate": 4.985249397476934e-05, "loss": 0.4717, "step": 258, "task_loss": 0.2866893410682678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.708909507071644, "compression_loss": 0.0, "distillation_loss": 0.34109288454055786, "epoch": 0.25, "learning_rate": 4.985133594653434e-05, "loss": 0.3198, "step": 259, "task_loss": 0.12784980237483978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7089429675193057, "compression_loss": 0.0, "distillation_loss": 0.3204500675201416, "epoch": 0.25, "learning_rate": 4.9850173403934466e-05, "loss": 0.3082, "step": 260, "task_loss": 0.198293536901474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7089764197719032, "compression_loss": 0.0, "distillation_loss": 0.5372319221496582, "epoch": 0.25, "learning_rate": 4.9849006347180915e-05, "loss": 0.5081, "step": 261, "task_loss": 0.24620920419692993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.70900986383044, "compression_loss": 0.0, "distillation_loss": 0.31807684898376465, "epoch": 0.25, "learning_rate": 4.9847834776485694e-05, "loss": 0.3002, "step": 262, "task_loss": 0.13968217372894287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7090432996959198, "compression_loss": 0.0, "distillation_loss": 0.3310486078262329, "epoch": 0.25, "learning_rate": 4.984665869206161e-05, "loss": 0.3205, "step": 263, "task_loss": 0.22510308027267456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7090767273693465, "compression_loss": 0.0, "distillation_loss": 0.15543615818023682, "epoch": 0.25, "learning_rate": 4.984547809412231e-05, "loss": 0.15, "step": 264, "task_loss": 0.10149666666984558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7091101468517236, "compression_loss": 0.0, "distillation_loss": 0.4099940061569214, "epoch": 0.25, "learning_rate": 4.984429298288227e-05, "loss": 0.385, "step": 265, "task_loss": 0.1603960394859314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7091435581440548, "compression_loss": 0.0, "distillation_loss": 0.6284467577934265, "epoch": 0.25, "learning_rate": 4.984310335855674e-05, "loss": 0.5938, "step": 266, "task_loss": 0.28220003843307495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7091769612473438, "compression_loss": 0.0, "distillation_loss": 0.4136122167110443, "epoch": 0.25, "learning_rate": 4.9841909221361855e-05, "loss": 0.3988, "step": 267, "task_loss": 0.26578381657600403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7092103561625943, "compression_loss": 0.0, "distillation_loss": 0.5570213794708252, "epoch": 0.25, "learning_rate": 4.9840710571514515e-05, "loss": 0.5466, "step": 268, "task_loss": 0.4525538682937622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7092437428908099, "compression_loss": 0.0, "distillation_loss": 0.4353582561016083, "epoch": 0.26, "learning_rate": 4.9839507409232464e-05, "loss": 0.4121, "step": 269, "task_loss": 0.20239636301994324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7092771214329945, "compression_loss": 0.0, "distillation_loss": 0.5894170999526978, "epoch": 0.26, "learning_rate": 4.983829973473426e-05, "loss": 0.5694, "step": 270, "task_loss": 0.38912904262542725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7093104917901515, "compression_loss": 0.0, "distillation_loss": 0.5201334953308105, "epoch": 0.26, "learning_rate": 4.983708754823929e-05, "loss": 0.4966, "step": 271, "task_loss": 0.284492552280426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7093438539632848, "compression_loss": 0.0, "distillation_loss": 0.4675508737564087, "epoch": 0.26, "learning_rate": 4.983587084996776e-05, "loss": 0.4469, "step": 272, "task_loss": 0.26147744059562683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.709377207953398, "compression_loss": 0.0, "distillation_loss": 0.45866286754608154, "epoch": 0.26, "learning_rate": 4.9834649640140664e-05, "loss": 0.4508, "step": 273, "task_loss": 0.37955737113952637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7094105537614946, "compression_loss": 0.0, "distillation_loss": 0.4817972183227539, "epoch": 0.26, "learning_rate": 4.9833423918979864e-05, "loss": 0.4607, "step": 274, "task_loss": 0.2709713578224182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7094438913885787, "compression_loss": 0.0, "distillation_loss": 0.3562793433666229, "epoch": 0.26, "learning_rate": 4.983219368670801e-05, "loss": 0.3413, "step": 275, "task_loss": 0.20625412464141846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7094772208356536, "compression_loss": 0.0, "distillation_loss": 0.45571157336235046, "epoch": 0.26, "learning_rate": 4.983095894354858e-05, "loss": 0.4336, "step": 276, "task_loss": 0.2345903366804123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7095105421037231, "compression_loss": 0.0, "distillation_loss": 0.22800546884536743, "epoch": 0.26, "learning_rate": 4.9829719689725865e-05, "loss": 0.2207, "step": 277, "task_loss": 0.1548459529876709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.709543855193791, "compression_loss": 0.0, "distillation_loss": 0.5845510959625244, "epoch": 0.26, "learning_rate": 4.982847592546499e-05, "loss": 0.564, "step": 278, "task_loss": 0.3790714740753174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7095771601068609, "compression_loss": 0.0, "distillation_loss": 0.2517128884792328, "epoch": 0.26, "learning_rate": 4.982722765099189e-05, "loss": 0.2349, "step": 279, "task_loss": 0.08311676234006882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7096104568439363, "compression_loss": 0.0, "distillation_loss": 0.4391753077507019, "epoch": 0.27, "learning_rate": 4.982597486653332e-05, "loss": 0.4248, "step": 280, "task_loss": 0.29559123516082764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7096437454060213, "compression_loss": 0.0, "distillation_loss": 0.5961909294128418, "epoch": 0.27, "learning_rate": 4.982471757231685e-05, "loss": 0.5709, "step": 281, "task_loss": 0.3435242772102356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7096770257941192, "compression_loss": 0.0, "distillation_loss": 0.3963918089866638, "epoch": 0.27, "learning_rate": 4.982345576857087e-05, "loss": 0.3802, "step": 282, "task_loss": 0.23463018238544464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7097102980092338, "compression_loss": 0.0, "distillation_loss": 0.40490058064460754, "epoch": 0.27, "learning_rate": 4.9822189455524604e-05, "loss": 0.3843, "step": 283, "task_loss": 0.1989540457725525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7097435620523688, "compression_loss": 0.0, "distillation_loss": 0.40912073850631714, "epoch": 0.27, "learning_rate": 4.982091863340808e-05, "loss": 0.3978, "step": 284, "task_loss": 0.29543089866638184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7097768179245278, "compression_loss": 0.0, "distillation_loss": 0.5951185822486877, "epoch": 0.27, "learning_rate": 4.9819643302452146e-05, "loss": 0.5699, "step": 285, "task_loss": 0.3433946371078491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7098100656267148, "compression_loss": 0.0, "distillation_loss": 0.45149531960487366, "epoch": 0.27, "learning_rate": 4.981836346288847e-05, "loss": 0.4407, "step": 286, "task_loss": 0.3439605236053467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7098433051599331, "compression_loss": 0.0, "distillation_loss": 0.9340552687644958, "epoch": 0.27, "learning_rate": 4.981707911494955e-05, "loss": 0.8901, "step": 287, "task_loss": 0.4947472810745239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7098765365251865, "compression_loss": 0.0, "distillation_loss": 0.40463870763778687, "epoch": 0.27, "learning_rate": 4.981579025886868e-05, "loss": 0.379, "step": 288, "task_loss": 0.14829692244529724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7099097597234787, "compression_loss": 0.0, "distillation_loss": 0.14002841711044312, "epoch": 0.27, "learning_rate": 4.981449689488e-05, "loss": 0.1338, "step": 289, "task_loss": 0.07818258553743362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7099429747558135, "compression_loss": 0.0, "distillation_loss": 0.45351576805114746, "epoch": 0.28, "learning_rate": 4.981319902321846e-05, "loss": 0.4276, "step": 290, "task_loss": 0.1947600394487381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7099761816231944, "compression_loss": 0.0, "distillation_loss": 0.4904792308807373, "epoch": 0.28, "learning_rate": 4.981189664411981e-05, "loss": 0.4701, "step": 291, "task_loss": 0.28627243638038635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7100093803266252, "compression_loss": 0.0, "distillation_loss": 0.6285742521286011, "epoch": 0.28, "learning_rate": 4.981058975782063e-05, "loss": 0.5967, "step": 292, "task_loss": 0.31022369861602783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7100425708671094, "compression_loss": 0.0, "distillation_loss": 0.7875237464904785, "epoch": 0.28, "learning_rate": 4.9809278364558336e-05, "loss": 0.7486, "step": 293, "task_loss": 0.39871400594711304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.710075753245651, "compression_loss": 0.0, "distillation_loss": 0.5977748036384583, "epoch": 0.28, "learning_rate": 4.980796246457115e-05, "loss": 0.5728, "step": 294, "task_loss": 0.3479851484298706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7101089274632534, "compression_loss": 0.0, "distillation_loss": 0.5126686096191406, "epoch": 0.28, "learning_rate": 4.9806642058098105e-05, "loss": 0.4815, "step": 295, "task_loss": 0.20053212344646454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7101420935209204, "compression_loss": 0.0, "distillation_loss": 0.5196264982223511, "epoch": 0.28, "learning_rate": 4.980531714537905e-05, "loss": 0.4878, "step": 296, "task_loss": 0.2012956738471985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7101752514196557, "compression_loss": 0.0, "distillation_loss": 0.33446091413497925, "epoch": 0.28, "learning_rate": 4.980398772665468e-05, "loss": 0.3161, "step": 297, "task_loss": 0.15055128931999207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7102084011604629, "compression_loss": 0.0, "distillation_loss": 0.6768823862075806, "epoch": 0.28, "learning_rate": 4.980265380216649e-05, "loss": 0.6398, "step": 298, "task_loss": 0.30652397871017456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7102415427443457, "compression_loss": 0.0, "distillation_loss": 0.3801252245903015, "epoch": 0.28, "learning_rate": 4.9801315372156775e-05, "loss": 0.3637, "step": 299, "task_loss": 0.21593311429023743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7102746761723079, "compression_loss": 0.0, "distillation_loss": 0.3485510051250458, "epoch": 0.28, "learning_rate": 4.979997243686868e-05, "loss": 0.3279, "step": 300, "task_loss": 0.14158813655376434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7103078014453531, "compression_loss": 0.0, "distillation_loss": 0.4838918447494507, "epoch": 0.29, "learning_rate": 4.979862499654615e-05, "loss": 0.4596, "step": 301, "task_loss": 0.24049346148967743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7103409185644849, "compression_loss": 0.0, "distillation_loss": 0.5298963785171509, "epoch": 0.29, "learning_rate": 4.9797273051433966e-05, "loss": 0.4982, "step": 302, "task_loss": 0.21265023946762085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7103740275307071, "compression_loss": 0.0, "distillation_loss": 0.1947401762008667, "epoch": 0.29, "learning_rate": 4.97959166017777e-05, "loss": 0.1844, "step": 303, "task_loss": 0.0909617617726326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7104071283450233, "compression_loss": 0.0, "distillation_loss": 0.3400229215621948, "epoch": 0.29, "learning_rate": 4.979455564782377e-05, "loss": 0.3316, "step": 304, "task_loss": 0.2561742961406708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7104402210084373, "compression_loss": 0.0, "distillation_loss": 0.33108600974082947, "epoch": 0.29, "learning_rate": 4.9793190189819395e-05, "loss": 0.3223, "step": 305, "task_loss": 0.24314963817596436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7104733055219526, "compression_loss": 0.0, "distillation_loss": 0.270094633102417, "epoch": 0.29, "learning_rate": 4.979182022801262e-05, "loss": 0.2599, "step": 306, "task_loss": 0.1684069037437439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7105063818865731, "compression_loss": 0.0, "distillation_loss": 0.3968997001647949, "epoch": 0.29, "learning_rate": 4.979044576265229e-05, "loss": 0.3775, "step": 307, "task_loss": 0.203133687376976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7105394501033022, "compression_loss": 0.0, "distillation_loss": 0.4618909955024719, "epoch": 0.29, "learning_rate": 4.9789066793988106e-05, "loss": 0.4387, "step": 308, "task_loss": 0.23047326505184174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7105725101731439, "compression_loss": 0.0, "distillation_loss": 0.4338553547859192, "epoch": 0.29, "learning_rate": 4.978768332227054e-05, "loss": 0.4193, "step": 309, "task_loss": 0.2878293991088867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7106055620971018, "compression_loss": 0.0, "distillation_loss": 0.6526356935501099, "epoch": 0.29, "learning_rate": 4.9786295347750936e-05, "loss": 0.6164, "step": 310, "task_loss": 0.29025229811668396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7106386058761793, "compression_loss": 0.0, "distillation_loss": 0.7481920123100281, "epoch": 0.3, "learning_rate": 4.9784902870681406e-05, "loss": 0.7157, "step": 311, "task_loss": 0.4227951765060425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7106716415113805, "compression_loss": 0.0, "distillation_loss": 0.679728627204895, "epoch": 0.3, "learning_rate": 4.97835058913149e-05, "loss": 0.6661, "step": 312, "task_loss": 0.5437166690826416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7107046690037088, "compression_loss": 0.0, "distillation_loss": 0.6855102777481079, "epoch": 0.3, "learning_rate": 4.9782104409905186e-05, "loss": 0.6504, "step": 313, "task_loss": 0.33481907844543457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.710737688354168, "compression_loss": 0.0, "distillation_loss": 0.4069738984107971, "epoch": 0.3, "learning_rate": 4.9780698426706864e-05, "loss": 0.3808, "step": 314, "task_loss": 0.14478835463523865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7107706995637616, "compression_loss": 0.0, "distillation_loss": 0.6615716218948364, "epoch": 0.3, "learning_rate": 4.977928794197532e-05, "loss": 0.6309, "step": 315, "task_loss": 0.3549352288246155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7108037026334935, "compression_loss": 0.0, "distillation_loss": 0.5655452609062195, "epoch": 0.3, "learning_rate": 4.9777872955966785e-05, "loss": 0.5316, "step": 316, "task_loss": 0.22563889622688293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7108366975643673, "compression_loss": 0.0, "distillation_loss": 0.3800777792930603, "epoch": 0.3, "learning_rate": 4.97764534689383e-05, "loss": 0.3621, "step": 317, "task_loss": 0.20060645043849945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7108696843573867, "compression_loss": 0.0, "distillation_loss": 0.2623631954193115, "epoch": 0.3, "learning_rate": 4.977502948114772e-05, "loss": 0.2563, "step": 318, "task_loss": 0.20183011889457703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7109026630135554, "compression_loss": 0.0, "distillation_loss": 0.4012034833431244, "epoch": 0.3, "learning_rate": 4.977360099285371e-05, "loss": 0.3783, "step": 319, "task_loss": 0.17202433943748474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7109356335338771, "compression_loss": 0.0, "distillation_loss": 0.35331350564956665, "epoch": 0.3, "learning_rate": 4.9772168004315765e-05, "loss": 0.3308, "step": 320, "task_loss": 0.12813310325145721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7109685959193554, "compression_loss": 0.0, "distillation_loss": 0.3918440043926239, "epoch": 0.3, "learning_rate": 4.9770730515794204e-05, "loss": 0.3696, "step": 321, "task_loss": 0.16927433013916016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.711001550170994, "compression_loss": 0.0, "distillation_loss": 0.3784354627132416, "epoch": 0.31, "learning_rate": 4.976928852755015e-05, "loss": 0.3608, "step": 322, "task_loss": 0.2020285427570343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7110344962897965, "compression_loss": 0.0, "distillation_loss": 0.5241067409515381, "epoch": 0.31, "learning_rate": 4.976784203984554e-05, "loss": 0.501, "step": 323, "task_loss": 0.2931115925312042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7110674342767669, "compression_loss": 0.0, "distillation_loss": 0.3190663456916809, "epoch": 0.31, "learning_rate": 4.976639105294314e-05, "loss": 0.308, "step": 324, "task_loss": 0.2081044614315033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7111003641329084, "compression_loss": 0.0, "distillation_loss": 0.3578122854232788, "epoch": 0.31, "learning_rate": 4.976493556710653e-05, "loss": 0.3369, "step": 325, "task_loss": 0.14846715331077576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7111332858592252, "compression_loss": 0.0, "distillation_loss": 0.4352418780326843, "epoch": 0.31, "learning_rate": 4.976347558260011e-05, "loss": 0.4134, "step": 326, "task_loss": 0.21667122840881348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7111661994567205, "compression_loss": 0.0, "distillation_loss": 0.36462095379829407, "epoch": 0.31, "learning_rate": 4.976201109968908e-05, "loss": 0.3506, "step": 327, "task_loss": 0.2242008000612259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7111991049263984, "compression_loss": 0.0, "distillation_loss": 0.5992603302001953, "epoch": 0.31, "learning_rate": 4.976054211863949e-05, "loss": 0.5782, "step": 328, "task_loss": 0.38846805691719055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7112320022692623, "compression_loss": 0.0, "distillation_loss": 0.1498991847038269, "epoch": 0.31, "learning_rate": 4.9759068639718166e-05, "loss": 0.1485, "step": 329, "task_loss": 0.13616728782653809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.711264891486316, "compression_loss": 0.0, "distillation_loss": 0.7137056589126587, "epoch": 0.31, "learning_rate": 4.975759066319278e-05, "loss": 0.6852, "step": 330, "task_loss": 0.4287063479423523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7112977725785632, "compression_loss": 0.0, "distillation_loss": 0.3997446298599243, "epoch": 0.31, "learning_rate": 4.9756108189331825e-05, "loss": 0.385, "step": 331, "task_loss": 0.2526022791862488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7113306455470074, "compression_loss": 0.0, "distillation_loss": 0.6099421977996826, "epoch": 0.32, "learning_rate": 4.975462121840458e-05, "loss": 0.5846, "step": 332, "task_loss": 0.35605889558792114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7113635103926526, "compression_loss": 0.0, "distillation_loss": 0.3887147605419159, "epoch": 0.32, "learning_rate": 4.975312975068118e-05, "loss": 0.3771, "step": 333, "task_loss": 0.27239084243774414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7113963671165022, "compression_loss": 0.0, "distillation_loss": 0.3457412123680115, "epoch": 0.32, "learning_rate": 4.975163378643255e-05, "loss": 0.3356, "step": 334, "task_loss": 0.2438831776380539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7114292157195601, "compression_loss": 0.0, "distillation_loss": 0.26972734928131104, "epoch": 0.32, "learning_rate": 4.975013332593044e-05, "loss": 0.2563, "step": 335, "task_loss": 0.13514116406440735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7114620562028297, "compression_loss": 0.0, "distillation_loss": 0.269406259059906, "epoch": 0.32, "learning_rate": 4.97486283694474e-05, "loss": 0.2526, "step": 336, "task_loss": 0.10177846252918243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7114948885673149, "compression_loss": 0.0, "distillation_loss": 0.5931138396263123, "epoch": 0.32, "learning_rate": 4.974711891725684e-05, "loss": 0.5679, "step": 337, "task_loss": 0.34124326705932617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7115277128140194, "compression_loss": 0.0, "distillation_loss": 0.12281601130962372, "epoch": 0.32, "learning_rate": 4.9745604969632934e-05, "loss": 0.1292, "step": 338, "task_loss": 0.18707461655139923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7115605289439467, "compression_loss": 0.0, "distillation_loss": 0.6009297966957092, "epoch": 0.32, "learning_rate": 4.974408652685072e-05, "loss": 0.5704, "step": 339, "task_loss": 0.296112984418869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7115933369581007, "compression_loss": 0.0, "distillation_loss": 0.6147419214248657, "epoch": 0.32, "learning_rate": 4.974256358918601e-05, "loss": 0.586, "step": 340, "task_loss": 0.3271123766899109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7116261368574849, "compression_loss": 0.0, "distillation_loss": 0.5920723080635071, "epoch": 0.32, "learning_rate": 4.9741036156915464e-05, "loss": 0.565, "step": 341, "task_loss": 0.32182377576828003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7116589286431031, "compression_loss": 0.0, "distillation_loss": 0.2902904450893402, "epoch": 0.32, "learning_rate": 4.973950423031655e-05, "loss": 0.2809, "step": 342, "task_loss": 0.19625771045684814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.711691712315959, "compression_loss": 0.0, "distillation_loss": 0.674541711807251, "epoch": 0.33, "learning_rate": 4.9737967809667546e-05, "loss": 0.6477, "step": 343, "task_loss": 0.4057275056838989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7117244878770561, "compression_loss": 0.0, "distillation_loss": 0.18200403451919556, "epoch": 0.33, "learning_rate": 4.9736426895247545e-05, "loss": 0.1747, "step": 344, "task_loss": 0.10881594568490982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7117572553273983, "compression_loss": 0.0, "distillation_loss": 0.5339623093605042, "epoch": 0.33, "learning_rate": 4.973488148733647e-05, "loss": 0.5104, "step": 345, "task_loss": 0.29824596643447876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7117900146679892, "compression_loss": 0.0, "distillation_loss": 0.5717617273330688, "epoch": 0.33, "learning_rate": 4.973333158621505e-05, "loss": 0.553, "step": 346, "task_loss": 0.3842325806617737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7118227658998324, "compression_loss": 0.0, "distillation_loss": 0.4612230062484741, "epoch": 0.33, "learning_rate": 4.973177719216483e-05, "loss": 0.4341, "step": 347, "task_loss": 0.1896570473909378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7118555090239316, "compression_loss": 0.0, "distillation_loss": 0.383368581533432, "epoch": 0.33, "learning_rate": 4.973021830546817e-05, "loss": 0.3685, "step": 348, "task_loss": 0.23502130806446075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7118882440412907, "compression_loss": 0.0, "distillation_loss": 0.33486291766166687, "epoch": 0.33, "learning_rate": 4.972865492640826e-05, "loss": 0.3254, "step": 349, "task_loss": 0.24004624783992767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7119209709529131, "compression_loss": 0.0, "distillation_loss": 0.37039846181869507, "epoch": 0.33, "learning_rate": 4.972708705526908e-05, "loss": 0.3512, "step": 350, "task_loss": 0.1785244345664978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7119536897598027, "compression_loss": 0.0, "distillation_loss": 0.4161568284034729, "epoch": 0.33, "learning_rate": 4.972551469233545e-05, "loss": 0.3904, "step": 351, "task_loss": 0.15905624628067017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7119864004629629, "compression_loss": 0.0, "distillation_loss": 0.4145691394805908, "epoch": 0.33, "learning_rate": 4.9723937837892996e-05, "loss": 0.3912, "step": 352, "task_loss": 0.18065348267555237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7120191030633978, "compression_loss": 0.0, "distillation_loss": 0.262037992477417, "epoch": 0.34, "learning_rate": 4.972235649222817e-05, "loss": 0.2493, "step": 353, "task_loss": 0.13479451835155487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7120517975621107, "compression_loss": 0.0, "distillation_loss": 0.3872963786125183, "epoch": 0.34, "learning_rate": 4.972077065562821e-05, "loss": 0.3693, "step": 354, "task_loss": 0.2070463001728058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7120844839601054, "compression_loss": 0.0, "distillation_loss": 0.48908573389053345, "epoch": 0.34, "learning_rate": 4.971918032838122e-05, "loss": 0.4654, "step": 355, "task_loss": 0.2519568204879761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7121171622583856, "compression_loss": 0.0, "distillation_loss": 0.15656299889087677, "epoch": 0.34, "learning_rate": 4.9717585510776065e-05, "loss": 0.1556, "step": 356, "task_loss": 0.1469275802373886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.712149832457955, "compression_loss": 0.0, "distillation_loss": 0.42582622170448303, "epoch": 0.34, "learning_rate": 4.971598620310246e-05, "loss": 0.4109, "step": 357, "task_loss": 0.27654868364334106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7121824945598174, "compression_loss": 0.0, "distillation_loss": 0.4828101396560669, "epoch": 0.34, "learning_rate": 4.9714382405650926e-05, "loss": 0.4592, "step": 358, "task_loss": 0.24672147631645203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7122151485649763, "compression_loss": 0.0, "distillation_loss": 0.2761083245277405, "epoch": 0.34, "learning_rate": 4.971277411871281e-05, "loss": 0.266, "step": 359, "task_loss": 0.17460951209068298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7122477944744354, "compression_loss": 0.0, "distillation_loss": 0.5871754884719849, "epoch": 0.34, "learning_rate": 4.971116134258025e-05, "loss": 0.5566, "step": 360, "task_loss": 0.28161656856536865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7122804322891985, "compression_loss": 0.0, "distillation_loss": 0.4658134877681732, "epoch": 0.34, "learning_rate": 4.9709544077546235e-05, "loss": 0.4412, "step": 361, "task_loss": 0.21951305866241455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7123130620102691, "compression_loss": 0.0, "distillation_loss": 0.4061258137226105, "epoch": 0.34, "learning_rate": 4.9707922323904524e-05, "loss": 0.3812, "step": 362, "task_loss": 0.1569288671016693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.712345683638651, "compression_loss": 0.0, "distillation_loss": 0.4409645199775696, "epoch": 0.34, "learning_rate": 4.9706296081949724e-05, "loss": 0.4208, "step": 363, "task_loss": 0.23960046470165253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7123782971753478, "compression_loss": 0.0, "distillation_loss": 0.13879182934761047, "epoch": 0.35, "learning_rate": 4.9704665351977266e-05, "loss": 0.1371, "step": 364, "task_loss": 0.12139101326465607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7124109026213634, "compression_loss": 0.0, "distillation_loss": 0.5069670677185059, "epoch": 0.35, "learning_rate": 4.9703030134283356e-05, "loss": 0.4817, "step": 365, "task_loss": 0.25386273860931396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7124434999777013, "compression_loss": 0.0, "distillation_loss": 0.46004921197891235, "epoch": 0.35, "learning_rate": 4.970139042916506e-05, "loss": 0.4276, "step": 366, "task_loss": 0.13600590825080872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7124760892453651, "compression_loss": 0.0, "distillation_loss": 0.47816845774650574, "epoch": 0.35, "learning_rate": 4.969974623692023e-05, "loss": 0.4559, "step": 367, "task_loss": 0.2554030418395996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7125086704253587, "compression_loss": 0.0, "distillation_loss": 0.5051754117012024, "epoch": 0.35, "learning_rate": 4.969809755784753e-05, "loss": 0.4801, "step": 368, "task_loss": 0.2545245885848999 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7125412435186856, "compression_loss": 0.0, "distillation_loss": 0.5262600183486938, "epoch": 0.35, "learning_rate": 4.969644439224647e-05, "loss": 0.5022, "step": 369, "task_loss": 0.28538602590560913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7125738085263497, "compression_loss": 0.0, "distillation_loss": 0.32377320528030396, "epoch": 0.35, "learning_rate": 4.969478674041735e-05, "loss": 0.3042, "step": 370, "task_loss": 0.12757205963134766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7126063654493544, "compression_loss": 0.0, "distillation_loss": 0.31874191761016846, "epoch": 0.35, "learning_rate": 4.969312460266128e-05, "loss": 0.3064, "step": 371, "task_loss": 0.19505202770233154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7126389142887035, "compression_loss": 0.0, "distillation_loss": 0.3652954697608948, "epoch": 0.35, "learning_rate": 4.969145797928021e-05, "loss": 0.3444, "step": 372, "task_loss": 0.156343013048172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7126714550454009, "compression_loss": 0.0, "distillation_loss": 0.19748246669769287, "epoch": 0.35, "learning_rate": 4.968978687057687e-05, "loss": 0.1868, "step": 373, "task_loss": 0.09101182222366333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7127039877204498, "compression_loss": 0.0, "distillation_loss": 0.5082756876945496, "epoch": 0.36, "learning_rate": 4.9688111276854846e-05, "loss": 0.4828, "step": 374, "task_loss": 0.2530589699745178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7127365123148545, "compression_loss": 0.0, "distillation_loss": 0.48118260502815247, "epoch": 0.36, "learning_rate": 4.9686431198418515e-05, "loss": 0.4566, "step": 375, "task_loss": 0.23539604246616364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7127690288296181, "compression_loss": 0.0, "distillation_loss": 0.43502384424209595, "epoch": 0.36, "learning_rate": 4.968474663557306e-05, "loss": 0.4128, "step": 376, "task_loss": 0.21273593604564667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7128015372657446, "compression_loss": 0.0, "distillation_loss": 0.4908156096935272, "epoch": 0.36, "learning_rate": 4.9683057588624494e-05, "loss": 0.4682, "step": 377, "task_loss": 0.26497960090637207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7128340376242377, "compression_loss": 0.0, "distillation_loss": 0.5738304853439331, "epoch": 0.36, "learning_rate": 4.968136405787964e-05, "loss": 0.5737, "step": 378, "task_loss": 0.5724409818649292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7128665299061009, "compression_loss": 0.0, "distillation_loss": 0.31743335723876953, "epoch": 0.36, "learning_rate": 4.967966604364614e-05, "loss": 0.3031, "step": 379, "task_loss": 0.17429381608963013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.712899014112338, "compression_loss": 0.0, "distillation_loss": 0.6811038851737976, "epoch": 0.36, "learning_rate": 4.9677963546232445e-05, "loss": 0.651, "step": 380, "task_loss": 0.3800850212574005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7129314902439526, "compression_loss": 0.0, "distillation_loss": 0.5749964714050293, "epoch": 0.36, "learning_rate": 4.967625656594782e-05, "loss": 0.5545, "step": 381, "task_loss": 0.3700599670410156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7129639583019486, "compression_loss": 0.0, "distillation_loss": 0.5684269666671753, "epoch": 0.36, "learning_rate": 4.967454510310235e-05, "loss": 0.5486, "step": 382, "task_loss": 0.3700346052646637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7129964182873294, "compression_loss": 0.0, "distillation_loss": 0.42811310291290283, "epoch": 0.36, "learning_rate": 4.967282915800693e-05, "loss": 0.4135, "step": 383, "task_loss": 0.2819935083389282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7130288702010988, "compression_loss": 0.0, "distillation_loss": 0.5204276442527771, "epoch": 0.36, "learning_rate": 4.9671108730973274e-05, "loss": 0.4932, "step": 384, "task_loss": 0.24831140041351318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7130613140442605, "compression_loss": 0.0, "distillation_loss": 0.33399271965026855, "epoch": 0.37, "learning_rate": 4.9669383822313886e-05, "loss": 0.3222, "step": 385, "task_loss": 0.21643346548080444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7130937498178183, "compression_loss": 0.0, "distillation_loss": 0.318186491727829, "epoch": 0.37, "learning_rate": 4.966765443234212e-05, "loss": 0.3105, "step": 386, "task_loss": 0.24149882793426514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7131261775227756, "compression_loss": 0.0, "distillation_loss": 0.20772957801818848, "epoch": 0.37, "learning_rate": 4.966592056137213e-05, "loss": 0.2072, "step": 387, "task_loss": 0.20199303328990936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7131585971601364, "compression_loss": 0.0, "distillation_loss": 0.29850098490715027, "epoch": 0.37, "learning_rate": 4.966418220971888e-05, "loss": 0.2825, "step": 388, "task_loss": 0.13806317746639252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.713191008730904, "compression_loss": 0.0, "distillation_loss": 0.555107831954956, "epoch": 0.37, "learning_rate": 4.9662439377698145e-05, "loss": 0.5276, "step": 389, "task_loss": 0.2797929644584656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7132234122360824, "compression_loss": 0.0, "distillation_loss": 0.31871169805526733, "epoch": 0.37, "learning_rate": 4.9660692065626515e-05, "loss": 0.3048, "step": 390, "task_loss": 0.17996791005134583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7132558076766753, "compression_loss": 0.0, "distillation_loss": 0.34103503823280334, "epoch": 0.37, "learning_rate": 4.965894027382141e-05, "loss": 0.3362, "step": 391, "task_loss": 0.29244738817214966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7132881950536861, "compression_loss": 0.0, "distillation_loss": 0.42130306363105774, "epoch": 0.37, "learning_rate": 4.965718400260105e-05, "loss": 0.4119, "step": 392, "task_loss": 0.32684704661369324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7133205743681188, "compression_loss": 0.0, "distillation_loss": 0.252642959356308, "epoch": 0.37, "learning_rate": 4.965542325228446e-05, "loss": 0.2442, "step": 393, "task_loss": 0.16772150993347168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7133529456209768, "compression_loss": 0.0, "distillation_loss": 0.18364742398262024, "epoch": 0.37, "learning_rate": 4.96536580231915e-05, "loss": 0.172, "step": 394, "task_loss": 0.06767023354768753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7133853088132639, "compression_loss": 0.0, "distillation_loss": 0.40755707025527954, "epoch": 0.38, "learning_rate": 4.9651888315642815e-05, "loss": 0.3952, "step": 395, "task_loss": 0.2839694619178772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7134176639459838, "compression_loss": 0.0, "distillation_loss": 0.5597716569900513, "epoch": 0.38, "learning_rate": 4.96501141299599e-05, "loss": 0.5393, "step": 396, "task_loss": 0.35467255115509033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7134500110201403, "compression_loss": 0.0, "distillation_loss": 0.44882553815841675, "epoch": 0.38, "learning_rate": 4.9648335466465035e-05, "loss": 0.4253, "step": 397, "task_loss": 0.21373212337493896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7134823500367369, "compression_loss": 0.0, "distillation_loss": 0.562800407409668, "epoch": 0.38, "learning_rate": 4.964655232548133e-05, "loss": 0.5389, "step": 398, "task_loss": 0.3235562741756439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7135146809967773, "compression_loss": 0.0, "distillation_loss": 0.43563634157180786, "epoch": 0.38, "learning_rate": 4.964476470733269e-05, "loss": 0.4108, "step": 399, "task_loss": 0.18746916949748993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7135470039012652, "compression_loss": 0.0, "distillation_loss": 0.5361025333404541, "epoch": 0.38, "learning_rate": 4.964297261234385e-05, "loss": 0.5201, "step": 400, "task_loss": 0.3763747215270996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7135793187512043, "compression_loss": 0.0, "distillation_loss": 0.40355557203292847, "epoch": 0.38, "learning_rate": 4.964117604084036e-05, "loss": 0.3828, "step": 401, "task_loss": 0.19647303223609924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7136116255475984, "compression_loss": 0.0, "distillation_loss": 0.4803870916366577, "epoch": 0.38, "learning_rate": 4.963937499314857e-05, "loss": 0.4581, "step": 402, "task_loss": 0.25726521015167236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.713643924291451, "compression_loss": 0.0, "distillation_loss": 0.6330090761184692, "epoch": 0.38, "learning_rate": 4.963756946959564e-05, "loss": 0.6021, "step": 403, "task_loss": 0.3235839009284973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7136762149837658, "compression_loss": 0.0, "distillation_loss": 0.40180158615112305, "epoch": 0.38, "learning_rate": 4.9635759470509554e-05, "loss": 0.3963, "step": 404, "task_loss": 0.3472459614276886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7137084976255466, "compression_loss": 0.0, "distillation_loss": 0.29341554641723633, "epoch": 0.38, "learning_rate": 4.9633944996219125e-05, "loss": 0.2771, "step": 405, "task_loss": 0.12987171113491058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.713740772217797, "compression_loss": 0.0, "distillation_loss": 0.23439250886440277, "epoch": 0.39, "learning_rate": 4.9632126047053954e-05, "loss": 0.2184, "step": 406, "task_loss": 0.07485216110944748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7137730387615207, "compression_loss": 0.0, "distillation_loss": 0.2850918769836426, "epoch": 0.39, "learning_rate": 4.963030262334445e-05, "loss": 0.2713, "step": 407, "task_loss": 0.14699222147464752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7138052972577213, "compression_loss": 0.0, "distillation_loss": 0.392857164144516, "epoch": 0.39, "learning_rate": 4.962847472542185e-05, "loss": 0.3697, "step": 408, "task_loss": 0.16155928373336792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7138375477074026, "compression_loss": 0.0, "distillation_loss": 0.3292420208454132, "epoch": 0.39, "learning_rate": 4.96266423536182e-05, "loss": 0.3101, "step": 409, "task_loss": 0.13783293962478638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7138697901115683, "compression_loss": 0.0, "distillation_loss": 0.5401057600975037, "epoch": 0.39, "learning_rate": 4.9624805508266375e-05, "loss": 0.5129, "step": 410, "task_loss": 0.26759254932403564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.713902024471222, "compression_loss": 0.0, "distillation_loss": 0.4787713289260864, "epoch": 0.39, "learning_rate": 4.9622964189700026e-05, "loss": 0.4574, "step": 411, "task_loss": 0.2650720477104187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7139342507873674, "compression_loss": 0.0, "distillation_loss": 0.3292551040649414, "epoch": 0.39, "learning_rate": 4.962111839825365e-05, "loss": 0.3079, "step": 412, "task_loss": 0.11532945930957794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7139664690610081, "compression_loss": 0.0, "distillation_loss": 0.32782095670700073, "epoch": 0.39, "learning_rate": 4.961926813426254e-05, "loss": 0.3213, "step": 413, "task_loss": 0.2627405524253845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.713998679293148, "compression_loss": 0.0, "distillation_loss": 0.4790850579738617, "epoch": 0.39, "learning_rate": 4.9617413398062814e-05, "loss": 0.4547, "step": 414, "task_loss": 0.23543286323547363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7140308814847907, "compression_loss": 0.0, "distillation_loss": 0.2713318467140198, "epoch": 0.39, "learning_rate": 4.9615554189991374e-05, "loss": 0.2603, "step": 415, "task_loss": 0.16122034192085266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7140630756369397, "compression_loss": 0.0, "distillation_loss": 0.32149723172187805, "epoch": 0.4, "learning_rate": 4.9613690510385965e-05, "loss": 0.303, "step": 416, "task_loss": 0.13658498227596283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7140952617505988, "compression_loss": 0.0, "distillation_loss": 0.22573064267635345, "epoch": 0.4, "learning_rate": 4.961182235958515e-05, "loss": 0.2124, "step": 417, "task_loss": 0.09292272478342056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7141274398267717, "compression_loss": 0.0, "distillation_loss": 0.7057199478149414, "epoch": 0.4, "learning_rate": 4.9609949737928254e-05, "loss": 0.6774, "step": 418, "task_loss": 0.42263519763946533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7141596098664622, "compression_loss": 0.0, "distillation_loss": 0.2580406963825226, "epoch": 0.4, "learning_rate": 4.9608072645755476e-05, "loss": 0.2494, "step": 419, "task_loss": 0.17207808792591095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7141917718706738, "compression_loss": 0.0, "distillation_loss": 0.297884464263916, "epoch": 0.4, "learning_rate": 4.960619108340778e-05, "loss": 0.2873, "step": 420, "task_loss": 0.19168856739997864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7142239258404103, "compression_loss": 0.0, "distillation_loss": 0.10459847003221512, "epoch": 0.4, "learning_rate": 4.9604305051226976e-05, "loss": 0.0963, "step": 421, "task_loss": 0.0215504951775074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7142560717766753, "compression_loss": 0.0, "distillation_loss": 0.2625170648097992, "epoch": 0.4, "learning_rate": 4.960241454955566e-05, "loss": 0.2528, "step": 422, "task_loss": 0.1657557189464569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7142882096804724, "compression_loss": 0.0, "distillation_loss": 0.49204713106155396, "epoch": 0.4, "learning_rate": 4.960051957873725e-05, "loss": 0.4776, "step": 423, "task_loss": 0.34725382924079895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7143203395528055, "compression_loss": 0.0, "distillation_loss": 0.20165809988975525, "epoch": 0.4, "learning_rate": 4.959862013911599e-05, "loss": 0.1876, "step": 424, "task_loss": 0.06141514703631401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7143524613946781, "compression_loss": 0.0, "distillation_loss": 0.24638772010803223, "epoch": 0.4, "learning_rate": 4.959671623103691e-05, "loss": 0.2331, "step": 425, "task_loss": 0.113340824842453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7143845752070941, "compression_loss": 0.0, "distillation_loss": 0.7320840358734131, "epoch": 0.4, "learning_rate": 4.959480785484587e-05, "loss": 0.7126, "step": 426, "task_loss": 0.53769451379776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7144166809910568, "compression_loss": 0.0, "distillation_loss": 0.3415003716945648, "epoch": 0.41, "learning_rate": 4.959289501088953e-05, "loss": 0.3285, "step": 427, "task_loss": 0.21169662475585938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7144487787475703, "compression_loss": 0.0, "distillation_loss": 0.3320407271385193, "epoch": 0.41, "learning_rate": 4.9590977699515374e-05, "loss": 0.3184, "step": 428, "task_loss": 0.19590842723846436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7144808684776381, "compression_loss": 0.0, "distillation_loss": 0.11836206912994385, "epoch": 0.41, "learning_rate": 4.958905592107168e-05, "loss": 0.1116, "step": 429, "task_loss": 0.05044740438461304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7145129501822638, "compression_loss": 0.0, "distillation_loss": 0.6588773727416992, "epoch": 0.41, "learning_rate": 4.958712967590756e-05, "loss": 0.6266, "step": 430, "task_loss": 0.3364851772785187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7145450238624512, "compression_loss": 0.0, "distillation_loss": 0.3294253945350647, "epoch": 0.41, "learning_rate": 4.9585198964372925e-05, "loss": 0.3151, "step": 431, "task_loss": 0.1865476369857788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7145770895192041, "compression_loss": 0.0, "distillation_loss": 0.41862189769744873, "epoch": 0.41, "learning_rate": 4.958326378681849e-05, "loss": 0.3942, "step": 432, "task_loss": 0.17390699684619904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7146091471535259, "compression_loss": 0.0, "distillation_loss": 0.6988288164138794, "epoch": 0.41, "learning_rate": 4.958132414359579e-05, "loss": 0.6708, "step": 433, "task_loss": 0.41860806941986084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7146411967664204, "compression_loss": 0.0, "distillation_loss": 0.5082677006721497, "epoch": 0.41, "learning_rate": 4.957938003505718e-05, "loss": 0.4901, "step": 434, "task_loss": 0.3265720009803772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7146732383588912, "compression_loss": 0.0, "distillation_loss": 0.43779420852661133, "epoch": 0.41, "learning_rate": 4.957743146155581e-05, "loss": 0.4178, "step": 435, "task_loss": 0.23794850707054138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7147052719319422, "compression_loss": 0.0, "distillation_loss": 0.33255505561828613, "epoch": 0.41, "learning_rate": 4.9575478423445655e-05, "loss": 0.3063, "step": 436, "task_loss": 0.07019799202680588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.714737297486577, "compression_loss": 0.0, "distillation_loss": 0.18722805380821228, "epoch": 0.42, "learning_rate": 4.957352092108148e-05, "loss": 0.1767, "step": 437, "task_loss": 0.08177616447210312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7147693150237991, "compression_loss": 0.0, "distillation_loss": 0.44641682505607605, "epoch": 0.42, "learning_rate": 4.957155895481889e-05, "loss": 0.4228, "step": 438, "task_loss": 0.2099793255329132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7148013245446124, "compression_loss": 0.0, "distillation_loss": 0.6220138072967529, "epoch": 0.42, "learning_rate": 4.956959252501426e-05, "loss": 0.5978, "step": 439, "task_loss": 0.3797354996204376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7148333260500205, "compression_loss": 0.0, "distillation_loss": 0.37998977303504944, "epoch": 0.42, "learning_rate": 4.956762163202484e-05, "loss": 0.3565, "step": 440, "task_loss": 0.1454060971736908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7148653195410271, "compression_loss": 0.0, "distillation_loss": 0.5865918397903442, "epoch": 0.42, "learning_rate": 4.956564627620862e-05, "loss": 0.5546, "step": 441, "task_loss": 0.2664667069911957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7148973050186359, "compression_loss": 0.0, "distillation_loss": 0.36456751823425293, "epoch": 0.42, "learning_rate": 4.956366645792445e-05, "loss": 0.3419, "step": 442, "task_loss": 0.1381196826696396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7149292824838505, "compression_loss": 0.0, "distillation_loss": 0.3425045609474182, "epoch": 0.42, "learning_rate": 4.956168217753197e-05, "loss": 0.3388, "step": 443, "task_loss": 0.3051426112651825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7149612519376747, "compression_loss": 0.0, "distillation_loss": 0.28074485063552856, "epoch": 0.42, "learning_rate": 4.955969343539162e-05, "loss": 0.2671, "step": 444, "task_loss": 0.14405837655067444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.714993213381112, "compression_loss": 0.0, "distillation_loss": 0.16746632754802704, "epoch": 0.42, "learning_rate": 4.955770023186469e-05, "loss": 0.1645, "step": 445, "task_loss": 0.1381833851337433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7150251668151663, "compression_loss": 0.0, "distillation_loss": 0.3588885962963104, "epoch": 0.42, "learning_rate": 4.9555702567313235e-05, "loss": 0.3396, "step": 446, "task_loss": 0.16621676087379456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7150571122408411, "compression_loss": 0.0, "distillation_loss": 0.26380306482315063, "epoch": 0.42, "learning_rate": 4.9553700442100146e-05, "loss": 0.2656, "step": 447, "task_loss": 0.28156140446662903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7150890496591402, "compression_loss": 0.0, "distillation_loss": 0.5757080912590027, "epoch": 0.43, "learning_rate": 4.955169385658912e-05, "loss": 0.5604, "step": 448, "task_loss": 0.42307889461517334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7151209790710672, "compression_loss": 0.0, "distillation_loss": 0.4960484802722931, "epoch": 0.43, "learning_rate": 4.954968281114467e-05, "loss": 0.4804, "step": 449, "task_loss": 0.33929193019866943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7151529004776259, "compression_loss": 0.0, "distillation_loss": 0.6576845645904541, "epoch": 0.43, "learning_rate": 4.9547667306132096e-05, "loss": 0.6253, "step": 450, "task_loss": 0.33397895097732544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7151848138798199, "compression_loss": 0.0, "distillation_loss": 0.30960074067115784, "epoch": 0.43, "learning_rate": 4.954564734191753e-05, "loss": 0.2977, "step": 451, "task_loss": 0.1903936266899109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7152167192786528, "compression_loss": 0.0, "distillation_loss": 0.1117134764790535, "epoch": 0.43, "learning_rate": 4.9543622918867926e-05, "loss": 0.106, "step": 452, "task_loss": 0.05442768707871437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7152486166751285, "compression_loss": 0.0, "distillation_loss": 0.34328725934028625, "epoch": 0.43, "learning_rate": 4.9541594037351e-05, "loss": 0.3272, "step": 453, "task_loss": 0.1819160431623459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7152805060702505, "compression_loss": 0.0, "distillation_loss": 0.6085219979286194, "epoch": 0.43, "learning_rate": 4.953956069773534e-05, "loss": 0.5771, "step": 454, "task_loss": 0.29387348890304565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7153123874650225, "compression_loss": 0.0, "distillation_loss": 0.2967539429664612, "epoch": 0.43, "learning_rate": 4.953752290039028e-05, "loss": 0.2804, "step": 455, "task_loss": 0.13335244357585907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7153442608604482, "compression_loss": 0.0, "distillation_loss": 0.3782120943069458, "epoch": 0.43, "learning_rate": 4.953548064568602e-05, "loss": 0.3664, "step": 456, "task_loss": 0.2596074044704437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7153761262575314, "compression_loss": 0.0, "distillation_loss": 0.21655282378196716, "epoch": 0.43, "learning_rate": 4.953343393399354e-05, "loss": 0.2041, "step": 457, "task_loss": 0.09245874732732773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7154079836572756, "compression_loss": 0.0, "distillation_loss": 0.4280490279197693, "epoch": 0.43, "learning_rate": 4.953138276568462e-05, "loss": 0.4155, "step": 458, "task_loss": 0.3028221130371094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7154398330606846, "compression_loss": 0.0, "distillation_loss": 0.6100641489028931, "epoch": 0.44, "learning_rate": 4.952932714113188e-05, "loss": 0.5865, "step": 459, "task_loss": 0.3739990293979645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7154716744687621, "compression_loss": 0.0, "distillation_loss": 0.43047159910202026, "epoch": 0.44, "learning_rate": 4.9527267060708734e-05, "loss": 0.4066, "step": 460, "task_loss": 0.1912587583065033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7155035078825116, "compression_loss": 0.0, "distillation_loss": 0.5423746109008789, "epoch": 0.44, "learning_rate": 4.9525202524789397e-05, "loss": 0.5169, "step": 461, "task_loss": 0.28800442814826965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7155353333029371, "compression_loss": 0.0, "distillation_loss": 0.2084915190935135, "epoch": 0.44, "learning_rate": 4.952313353374891e-05, "loss": 0.1945, "step": 462, "task_loss": 0.06886687129735947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7155671507310419, "compression_loss": 0.0, "distillation_loss": 0.36711233854293823, "epoch": 0.44, "learning_rate": 4.952106008796311e-05, "loss": 0.3632, "step": 463, "task_loss": 0.3277187943458557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.71559896016783, "compression_loss": 0.0, "distillation_loss": 0.25077369809150696, "epoch": 0.44, "learning_rate": 4.9518982187808653e-05, "loss": 0.2342, "step": 464, "task_loss": 0.08512859791517258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7156307616143049, "compression_loss": 0.0, "distillation_loss": 0.3569698929786682, "epoch": 0.44, "learning_rate": 4.9516899833663e-05, "loss": 0.3385, "step": 465, "task_loss": 0.17190499603748322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7156625550714704, "compression_loss": 0.0, "distillation_loss": 0.2773820161819458, "epoch": 0.44, "learning_rate": 4.9514813025904413e-05, "loss": 0.2626, "step": 466, "task_loss": 0.12949100136756897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.71569434054033, "compression_loss": 0.0, "distillation_loss": 0.4447813630104065, "epoch": 0.44, "learning_rate": 4.951272176491197e-05, "loss": 0.4288, "step": 467, "task_loss": 0.28465670347213745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7157261180218876, "compression_loss": 0.0, "distillation_loss": 0.41461753845214844, "epoch": 0.44, "learning_rate": 4.951062605106557e-05, "loss": 0.4048, "step": 468, "task_loss": 0.3163371682167053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7157578875171468, "compression_loss": 0.0, "distillation_loss": 0.559532642364502, "epoch": 0.45, "learning_rate": 4.950852588474591e-05, "loss": 0.5354, "step": 469, "task_loss": 0.31830355525016785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7157896490271112, "compression_loss": 0.0, "distillation_loss": 0.28237810730934143, "epoch": 0.45, "learning_rate": 4.9506421266334475e-05, "loss": 0.2687, "step": 470, "task_loss": 0.14537358283996582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7158214025527846, "compression_loss": 0.0, "distillation_loss": 0.2132379114627838, "epoch": 0.45, "learning_rate": 4.9504312196213596e-05, "loss": 0.2078, "step": 471, "task_loss": 0.15892483294010162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7158531480951705, "compression_loss": 0.0, "distillation_loss": 0.205579936504364, "epoch": 0.45, "learning_rate": 4.95021986747664e-05, "loss": 0.1931, "step": 472, "task_loss": 0.08082406967878342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7158848856552729, "compression_loss": 0.0, "distillation_loss": 0.31995484232902527, "epoch": 0.45, "learning_rate": 4.9500080702376805e-05, "loss": 0.3075, "step": 473, "task_loss": 0.19546376168727875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7159166152340952, "compression_loss": 0.0, "distillation_loss": 0.5218741297721863, "epoch": 0.45, "learning_rate": 4.949795827942956e-05, "loss": 0.4946, "step": 474, "task_loss": 0.24919168651103973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7159483368326413, "compression_loss": 0.0, "distillation_loss": 0.49504172801971436, "epoch": 0.45, "learning_rate": 4.9495831406310205e-05, "loss": 0.4802, "step": 475, "task_loss": 0.34659019112586975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7159800504519146, "compression_loss": 0.0, "distillation_loss": 0.43946248292922974, "epoch": 0.45, "learning_rate": 4.94937000834051e-05, "loss": 0.4265, "step": 476, "task_loss": 0.31031233072280884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.716011756092919, "compression_loss": 0.0, "distillation_loss": 0.3730089068412781, "epoch": 0.45, "learning_rate": 4.9491564311101426e-05, "loss": 0.3571, "step": 477, "task_loss": 0.2140880525112152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7160434537566581, "compression_loss": 0.0, "distillation_loss": 0.22854942083358765, "epoch": 0.45, "learning_rate": 4.9489424089787125e-05, "loss": 0.214, "step": 478, "task_loss": 0.08312854915857315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7160751434441356, "compression_loss": 0.0, "distillation_loss": 0.6738406419754028, "epoch": 0.45, "learning_rate": 4.948727941985101e-05, "loss": 0.6537, "step": 479, "task_loss": 0.4723435044288635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7161068251563553, "compression_loss": 0.0, "distillation_loss": 0.44496259093284607, "epoch": 0.46, "learning_rate": 4.948513030168265e-05, "loss": 0.4353, "step": 480, "task_loss": 0.34817105531692505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7161384988943207, "compression_loss": 0.0, "distillation_loss": 0.5351232290267944, "epoch": 0.46, "learning_rate": 4.948297673567245e-05, "loss": 0.5138, "step": 481, "task_loss": 0.3218046724796295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7161701646590355, "compression_loss": 0.0, "distillation_loss": 0.5943255424499512, "epoch": 0.46, "learning_rate": 4.948081872221161e-05, "loss": 0.5659, "step": 482, "task_loss": 0.31032171845436096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7162018224515034, "compression_loss": 0.0, "distillation_loss": 0.482815146446228, "epoch": 0.46, "learning_rate": 4.9478656261692155e-05, "loss": 0.4567, "step": 483, "task_loss": 0.22122564911842346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7162334722727283, "compression_loss": 0.0, "distillation_loss": 0.695792019367218, "epoch": 0.46, "learning_rate": 4.947648935450689e-05, "loss": 0.6668, "step": 484, "task_loss": 0.40558087825775146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7162651141237135, "compression_loss": 0.0, "distillation_loss": 0.2511596977710724, "epoch": 0.46, "learning_rate": 4.947431800104947e-05, "loss": 0.2414, "step": 485, "task_loss": 0.15338845551013947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7162967480054631, "compression_loss": 0.0, "distillation_loss": 0.45596519112586975, "epoch": 0.46, "learning_rate": 4.94721422017143e-05, "loss": 0.4342, "step": 486, "task_loss": 0.2383052110671997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7163283739189804, "compression_loss": 0.0, "distillation_loss": 0.3888728618621826, "epoch": 0.46, "learning_rate": 4.946996195689665e-05, "loss": 0.3687, "step": 487, "task_loss": 0.18734854459762573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7163599918652693, "compression_loss": 0.0, "distillation_loss": 0.47673919796943665, "epoch": 0.46, "learning_rate": 4.9467777266992555e-05, "loss": 0.4547, "step": 488, "task_loss": 0.2561192512512207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7163916018453333, "compression_loss": 0.0, "distillation_loss": 0.3363496959209442, "epoch": 0.46, "learning_rate": 4.946558813239888e-05, "loss": 0.3277, "step": 489, "task_loss": 0.24951259791851044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7164232038601764, "compression_loss": 0.0, "distillation_loss": 0.3862308859825134, "epoch": 0.47, "learning_rate": 4.94633945535133e-05, "loss": 0.3749, "step": 490, "task_loss": 0.2733776569366455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.716454797910802, "compression_loss": 0.0, "distillation_loss": 0.2634069323539734, "epoch": 0.47, "learning_rate": 4.946119653073428e-05, "loss": 0.2535, "step": 491, "task_loss": 0.16414958238601685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.716486383998214, "compression_loss": 0.0, "distillation_loss": 0.37143170833587646, "epoch": 0.47, "learning_rate": 4.9458994064461103e-05, "loss": 0.3589, "step": 492, "task_loss": 0.24603454768657684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7165179621234158, "compression_loss": 0.0, "distillation_loss": 0.24793553352355957, "epoch": 0.47, "learning_rate": 4.945678715509386e-05, "loss": 0.2349, "step": 493, "task_loss": 0.11790718138217926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7165495322874113, "compression_loss": 0.0, "distillation_loss": 0.21886222064495087, "epoch": 0.47, "learning_rate": 4.9454575803033445e-05, "loss": 0.2139, "step": 494, "task_loss": 0.1692572385072708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7165810944912041, "compression_loss": 0.0, "distillation_loss": 0.3721332550048828, "epoch": 0.47, "learning_rate": 4.945236000868156e-05, "loss": 0.3528, "step": 495, "task_loss": 0.17849226295948029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7166126487357979, "compression_loss": 0.0, "distillation_loss": 0.4568150043487549, "epoch": 0.47, "learning_rate": 4.9450139772440715e-05, "loss": 0.4353, "step": 496, "task_loss": 0.24134021997451782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7166441950221963, "compression_loss": 0.0, "distillation_loss": 0.20982536673545837, "epoch": 0.47, "learning_rate": 4.944791509471423e-05, "loss": 0.204, "step": 497, "task_loss": 0.1519790142774582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7166757333514032, "compression_loss": 0.0, "distillation_loss": 0.30614525079727173, "epoch": 0.47, "learning_rate": 4.944568597590622e-05, "loss": 0.288, "step": 498, "task_loss": 0.12448100000619888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7167072637244222, "compression_loss": 0.0, "distillation_loss": 0.4257122874259949, "epoch": 0.47, "learning_rate": 4.944345241642162e-05, "loss": 0.415, "step": 499, "task_loss": 0.3188416361808777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7167387861422567, "compression_loss": 0.0, "distillation_loss": 0.23117899894714355, "epoch": 0.47, "learning_rate": 4.944121441666617e-05, "loss": 0.2231, "step": 500, "task_loss": 0.15056748688220978 }, { "epoch": 0.47, "eval_accuracy": 0.8956422018348624, "eval_loss": 0.38681745529174805, "eval_runtime": 15.2332, "eval_samples_per_second": 57.243, "eval_steps_per_second": 7.155, "step": 500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7167703006059108, "compression_loss": 0.0, "distillation_loss": 0.20744021236896515, "epoch": 0.48, "learning_rate": 4.943897197704642e-05, "loss": 0.1999, "step": 501, "task_loss": 0.13241733610630035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.716801807116388, "compression_loss": 0.0, "distillation_loss": 0.7784759402275085, "epoch": 0.48, "learning_rate": 4.9436725097969696e-05, "loss": 0.7514, "step": 502, "task_loss": 0.5077430605888367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7168333056746918, "compression_loss": 0.0, "distillation_loss": 0.5257222652435303, "epoch": 0.48, "learning_rate": 4.943447377984418e-05, "loss": 0.5057, "step": 503, "task_loss": 0.32530543208122253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7168647962818263, "compression_loss": 0.0, "distillation_loss": 0.13662202656269073, "epoch": 0.48, "learning_rate": 4.943221802307882e-05, "loss": 0.1346, "step": 504, "task_loss": 0.11593800038099289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7168962789387948, "compression_loss": 0.0, "distillation_loss": 0.3498018980026245, "epoch": 0.48, "learning_rate": 4.942995782808339e-05, "loss": 0.3305, "step": 505, "task_loss": 0.15636515617370605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7169277536466011, "compression_loss": 0.0, "distillation_loss": 0.3136255145072937, "epoch": 0.48, "learning_rate": 4.9427693195268466e-05, "loss": 0.3083, "step": 506, "task_loss": 0.26005005836486816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7169592204062489, "compression_loss": 0.0, "distillation_loss": 0.27342021465301514, "epoch": 0.48, "learning_rate": 4.942542412504543e-05, "loss": 0.2671, "step": 507, "task_loss": 0.20997658371925354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.716990679218742, "compression_loss": 0.0, "distillation_loss": 0.19145192205905914, "epoch": 0.48, "learning_rate": 4.942315061782646e-05, "loss": 0.1941, "step": 508, "task_loss": 0.21748016774654388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.717022130085084, "compression_loss": 0.0, "distillation_loss": 0.21470308303833008, "epoch": 0.48, "learning_rate": 4.942087267402457e-05, "loss": 0.2198, "step": 509, "task_loss": 0.26588204503059387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7170535730062784, "compression_loss": 0.0, "distillation_loss": 0.3804921507835388, "epoch": 0.48, "learning_rate": 4.941859029405353e-05, "loss": 0.3673, "step": 510, "task_loss": 0.24898777902126312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7170850079833291, "compression_loss": 0.0, "distillation_loss": 0.3862866759300232, "epoch": 0.49, "learning_rate": 4.9416303478327974e-05, "loss": 0.3702, "step": 511, "task_loss": 0.22553594410419464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7171164350172397, "compression_loss": 0.0, "distillation_loss": 0.10008936375379562, "epoch": 0.49, "learning_rate": 4.9414012227263295e-05, "loss": 0.0956, "step": 512, "task_loss": 0.05475004017353058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7171478541090139, "compression_loss": 0.0, "distillation_loss": 0.30363941192626953, "epoch": 0.49, "learning_rate": 4.941171654127572e-05, "loss": 0.2881, "step": 513, "task_loss": 0.14781013131141663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7171792652596554, "compression_loss": 0.0, "distillation_loss": 0.5057163238525391, "epoch": 0.49, "learning_rate": 4.9409416420782264e-05, "loss": 0.4856, "step": 514, "task_loss": 0.3049323260784149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7172106684701678, "compression_loss": 0.0, "distillation_loss": 0.1923336535692215, "epoch": 0.49, "learning_rate": 4.940711186620076e-05, "loss": 0.1797, "step": 515, "task_loss": 0.06598946452140808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7172420637415549, "compression_loss": 0.0, "distillation_loss": 0.5293185710906982, "epoch": 0.49, "learning_rate": 4.9404802877949843e-05, "loss": 0.5265, "step": 516, "task_loss": 0.5010278820991516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7172734510748203, "compression_loss": 0.0, "distillation_loss": 0.24947986006736755, "epoch": 0.49, "learning_rate": 4.940248945644894e-05, "loss": 0.2324, "step": 517, "task_loss": 0.07890691608190536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7173048304709678, "compression_loss": 0.0, "distillation_loss": 0.27918702363967896, "epoch": 0.49, "learning_rate": 4.9400171602118306e-05, "loss": 0.2666, "step": 518, "task_loss": 0.15293952822685242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7173362019310009, "compression_loss": 0.0, "distillation_loss": 0.4333661198616028, "epoch": 0.49, "learning_rate": 4.939784931537899e-05, "loss": 0.4191, "step": 519, "task_loss": 0.29058921337127686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7173675654559234, "compression_loss": 0.0, "distillation_loss": 0.27411600947380066, "epoch": 0.49, "learning_rate": 4.9395522596652846e-05, "loss": 0.2661, "step": 520, "task_loss": 0.1940668672323227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.717398921046739, "compression_loss": 0.0, "distillation_loss": 0.16018861532211304, "epoch": 0.49, "learning_rate": 4.939319144636253e-05, "loss": 0.1733, "step": 521, "task_loss": 0.2914007306098938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7174302687044513, "compression_loss": 0.0, "distillation_loss": 0.2684335708618164, "epoch": 0.5, "learning_rate": 4.9390855864931504e-05, "loss": 0.2554, "step": 522, "task_loss": 0.13780063390731812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7174616084300639, "compression_loss": 0.0, "distillation_loss": 0.20900654792785645, "epoch": 0.5, "learning_rate": 4.938851585278405e-05, "loss": 0.1965, "step": 523, "task_loss": 0.08378442376852036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7174929402245807, "compression_loss": 0.0, "distillation_loss": 0.2784028649330139, "epoch": 0.5, "learning_rate": 4.938617141034523e-05, "loss": 0.2624, "step": 524, "task_loss": 0.11791129410266876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7175242640890053, "compression_loss": 0.0, "distillation_loss": 0.12981046736240387, "epoch": 0.5, "learning_rate": 4.938382253804094e-05, "loss": 0.1356, "step": 525, "task_loss": 0.18778133392333984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7175555800243414, "compression_loss": 0.0, "distillation_loss": 0.4506103992462158, "epoch": 0.5, "learning_rate": 4.938146923629784e-05, "loss": 0.4333, "step": 526, "task_loss": 0.2775367796421051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7175868880315925, "compression_loss": 0.0, "distillation_loss": 0.3384746313095093, "epoch": 0.5, "learning_rate": 4.937911150554343e-05, "loss": 0.3378, "step": 527, "task_loss": 0.3314347267150879 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7176181881117626, "compression_loss": 0.0, "distillation_loss": 0.2652072608470917, "epoch": 0.5, "learning_rate": 4.9376749346206006e-05, "loss": 0.2549, "step": 528, "task_loss": 0.16171765327453613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7176494802658551, "compression_loss": 0.0, "distillation_loss": 0.1128808930516243, "epoch": 0.5, "learning_rate": 4.937438275871467e-05, "loss": 0.1051, "step": 529, "task_loss": 0.034815624356269836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7176807644948738, "compression_loss": 0.0, "distillation_loss": 0.335213840007782, "epoch": 0.5, "learning_rate": 4.9372011743499315e-05, "loss": 0.3248, "step": 530, "task_loss": 0.23120583593845367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7177120407998224, "compression_loss": 0.0, "distillation_loss": 0.3072986900806427, "epoch": 0.5, "learning_rate": 4.9369636300990645e-05, "loss": 0.3177, "step": 531, "task_loss": 0.41148343682289124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7177433091817046, "compression_loss": 0.0, "distillation_loss": 0.3255481421947479, "epoch": 0.51, "learning_rate": 4.936725643162018e-05, "loss": 0.3308, "step": 532, "task_loss": 0.37787866592407227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.717774569641524, "compression_loss": 0.0, "distillation_loss": 0.8519558906555176, "epoch": 0.51, "learning_rate": 4.936487213582023e-05, "loss": 0.8174, "step": 533, "task_loss": 0.5065959095954895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7178058221802843, "compression_loss": 0.0, "distillation_loss": 0.20490780472755432, "epoch": 0.51, "learning_rate": 4.9362483414023905e-05, "loss": 0.2188, "step": 534, "task_loss": 0.34347814321517944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7178370667989893, "compression_loss": 0.0, "distillation_loss": 0.2214491367340088, "epoch": 0.51, "learning_rate": 4.936009026666515e-05, "loss": 0.2179, "step": 535, "task_loss": 0.18594780564308167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7178683034986425, "compression_loss": 0.0, "distillation_loss": 0.2852451801300049, "epoch": 0.51, "learning_rate": 4.935769269417867e-05, "loss": 0.2706, "step": 536, "task_loss": 0.13881553709506989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7178995322802476, "compression_loss": 0.0, "distillation_loss": 0.16195335984230042, "epoch": 0.51, "learning_rate": 4.935529069700001e-05, "loss": 0.1512, "step": 537, "task_loss": 0.05430576950311661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7179307531448085, "compression_loss": 0.0, "distillation_loss": 0.2246689647436142, "epoch": 0.51, "learning_rate": 4.935288427556549e-05, "loss": 0.2136, "step": 538, "task_loss": 0.11377856135368347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7179619660933286, "compression_loss": 0.0, "distillation_loss": 0.17521658539772034, "epoch": 0.51, "learning_rate": 4.935047343031227e-05, "loss": 0.1673, "step": 539, "task_loss": 0.0958407074213028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7179931711268118, "compression_loss": 0.0, "distillation_loss": 0.4904990792274475, "epoch": 0.51, "learning_rate": 4.934805816167827e-05, "loss": 0.4757, "step": 540, "task_loss": 0.3428148627281189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7180243682462617, "compression_loss": 0.0, "distillation_loss": 0.362811416387558, "epoch": 0.51, "learning_rate": 4.934563847010224e-05, "loss": 0.3665, "step": 541, "task_loss": 0.39953047037124634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7180555574526819, "compression_loss": 0.0, "distillation_loss": 0.2322240173816681, "epoch": 0.51, "learning_rate": 4.934321435602374e-05, "loss": 0.2193, "step": 542, "task_loss": 0.10299921035766602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7180867387470763, "compression_loss": 0.0, "distillation_loss": 0.15493597090244293, "epoch": 0.52, "learning_rate": 4.934078581988311e-05, "loss": 0.1526, "step": 543, "task_loss": 0.13205915689468384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7181179121304484, "compression_loss": 0.0, "distillation_loss": 0.4433606266975403, "epoch": 0.52, "learning_rate": 4.933835286212151e-05, "loss": 0.4219, "step": 544, "task_loss": 0.22923287749290466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7181490776038018, "compression_loss": 0.0, "distillation_loss": 0.3247934579849243, "epoch": 0.52, "learning_rate": 4.9335915483180896e-05, "loss": 0.3249, "step": 545, "task_loss": 0.3255178928375244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7181802351681404, "compression_loss": 0.0, "distillation_loss": 0.6595593690872192, "epoch": 0.52, "learning_rate": 4.9333473683504025e-05, "loss": 0.6289, "step": 546, "task_loss": 0.35308799147605896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7182113848244678, "compression_loss": 0.0, "distillation_loss": 0.3609609603881836, "epoch": 0.52, "learning_rate": 4.9331027463534484e-05, "loss": 0.3433, "step": 547, "task_loss": 0.18484395742416382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7182425265737876, "compression_loss": 0.0, "distillation_loss": 0.4003904163837433, "epoch": 0.52, "learning_rate": 4.932857682371661e-05, "loss": 0.3825, "step": 548, "task_loss": 0.22114846110343933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7182736604171036, "compression_loss": 0.0, "distillation_loss": 0.3539576828479767, "epoch": 0.52, "learning_rate": 4.9326121764495596e-05, "loss": 0.3378, "step": 549, "task_loss": 0.19238103926181793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7183047863554195, "compression_loss": 0.0, "distillation_loss": 0.09516200423240662, "epoch": 0.52, "learning_rate": 4.932366228631741e-05, "loss": 0.0975, "step": 550, "task_loss": 0.11850506067276001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7183359043897388, "compression_loss": 0.0, "distillation_loss": 0.784862220287323, "epoch": 0.52, "learning_rate": 4.932119838962882e-05, "loss": 0.7474, "step": 551, "task_loss": 0.41059941053390503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7183670145210653, "compression_loss": 0.0, "distillation_loss": 0.3884808421134949, "epoch": 0.52, "learning_rate": 4.931873007487741e-05, "loss": 0.3695, "step": 552, "task_loss": 0.19908683001995087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7183981167504028, "compression_loss": 0.0, "distillation_loss": 0.1411825716495514, "epoch": 0.53, "learning_rate": 4.9316257342511565e-05, "loss": 0.1397, "step": 553, "task_loss": 0.12602877616882324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7184292110787548, "compression_loss": 0.0, "distillation_loss": 0.27567747235298157, "epoch": 0.53, "learning_rate": 4.9313780192980466e-05, "loss": 0.259, "step": 554, "task_loss": 0.10907714068889618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718460297507125, "compression_loss": 0.0, "distillation_loss": 0.23973673582077026, "epoch": 0.53, "learning_rate": 4.9311298626734095e-05, "loss": 0.2289, "step": 555, "task_loss": 0.13170108199119568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7184913760365172, "compression_loss": 0.0, "distillation_loss": 0.3157758116722107, "epoch": 0.53, "learning_rate": 4.9308812644223245e-05, "loss": 0.2988, "step": 556, "task_loss": 0.14566126465797424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7185224466679351, "compression_loss": 0.0, "distillation_loss": 0.19598382711410522, "epoch": 0.53, "learning_rate": 4.9306322245899505e-05, "loss": 0.187, "step": 557, "task_loss": 0.10587802529335022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718553509402382, "compression_loss": 0.0, "distillation_loss": 0.26901909708976746, "epoch": 0.53, "learning_rate": 4.930382743221528e-05, "loss": 0.2577, "step": 558, "task_loss": 0.1556887924671173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7185845642408621, "compression_loss": 0.0, "distillation_loss": 0.4021989703178406, "epoch": 0.53, "learning_rate": 4.930132820362374e-05, "loss": 0.3864, "step": 559, "task_loss": 0.24379751086235046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7186156111843789, "compression_loss": 0.0, "distillation_loss": 0.4040166735649109, "epoch": 0.53, "learning_rate": 4.9298824560578895e-05, "loss": 0.387, "step": 560, "task_loss": 0.2337295562028885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7186466502339359, "compression_loss": 0.0, "distillation_loss": 0.5355952978134155, "epoch": 0.53, "learning_rate": 4.929631650353555e-05, "loss": 0.5101, "step": 561, "task_loss": 0.28034472465515137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718677681390537, "compression_loss": 0.0, "distillation_loss": 0.22372055053710938, "epoch": 0.53, "learning_rate": 4.92938040329493e-05, "loss": 0.2111, "step": 562, "task_loss": 0.09713706374168396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7187087046551858, "compression_loss": 0.0, "distillation_loss": 0.5858652591705322, "epoch": 0.53, "learning_rate": 4.9291287149276544e-05, "loss": 0.5583, "step": 563, "task_loss": 0.31010231375694275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7187397200288859, "compression_loss": 0.0, "distillation_loss": 0.20850016176700592, "epoch": 0.54, "learning_rate": 4.928876585297448e-05, "loss": 0.2016, "step": 564, "task_loss": 0.1396273374557495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7187707275126411, "compression_loss": 0.0, "distillation_loss": 0.2773027718067169, "epoch": 0.54, "learning_rate": 4.9286240144501136e-05, "loss": 0.262, "step": 565, "task_loss": 0.1242566779255867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718801727107455, "compression_loss": 0.0, "distillation_loss": 0.11861973255872726, "epoch": 0.54, "learning_rate": 4.928371002431531e-05, "loss": 0.1102, "step": 566, "task_loss": 0.03490396589040756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7188327188143315, "compression_loss": 0.0, "distillation_loss": 0.04367408901453018, "epoch": 0.54, "learning_rate": 4.92811754928766e-05, "loss": 0.0404, "step": 567, "task_loss": 0.010761696845293045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718863702634274, "compression_loss": 0.0, "distillation_loss": 0.5102982521057129, "epoch": 0.54, "learning_rate": 4.927863655064542e-05, "loss": 0.4859, "step": 568, "task_loss": 0.2663833200931549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7188946785682863, "compression_loss": 0.0, "distillation_loss": 0.5678519010543823, "epoch": 0.54, "learning_rate": 4.9276093198082986e-05, "loss": 0.5548, "step": 569, "task_loss": 0.4374026358127594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7189256466173721, "compression_loss": 0.0, "distillation_loss": 0.2537938952445984, "epoch": 0.54, "learning_rate": 4.92735454356513e-05, "loss": 0.2444, "step": 570, "task_loss": 0.15972843766212463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.718956606782535, "compression_loss": 0.0, "distillation_loss": 0.37457334995269775, "epoch": 0.54, "learning_rate": 4.927099326381319e-05, "loss": 0.3575, "step": 571, "task_loss": 0.20346179604530334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7189875590647787, "compression_loss": 0.0, "distillation_loss": 0.4022772014141083, "epoch": 0.54, "learning_rate": 4.926843668303227e-05, "loss": 0.3899, "step": 572, "task_loss": 0.27801287174224854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7190185034651071, "compression_loss": 0.0, "distillation_loss": 0.21378515660762787, "epoch": 0.54, "learning_rate": 4.926587569377293e-05, "loss": 0.202, "step": 573, "task_loss": 0.09593548625707626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7190494399845236, "compression_loss": 0.0, "distillation_loss": 0.2173890620470047, "epoch": 0.55, "learning_rate": 4.926331029650042e-05, "loss": 0.2096, "step": 574, "task_loss": 0.1397651731967926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719080368624032, "compression_loss": 0.0, "distillation_loss": 0.0907929316163063, "epoch": 0.55, "learning_rate": 4.926074049168074e-05, "loss": 0.0843, "step": 575, "task_loss": 0.026197172701358795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719111289384636, "compression_loss": 0.0, "distillation_loss": 0.281998872756958, "epoch": 0.55, "learning_rate": 4.9258166279780704e-05, "loss": 0.2744, "step": 576, "task_loss": 0.20600730180740356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7191422022673393, "compression_loss": 0.0, "distillation_loss": 0.30183589458465576, "epoch": 0.55, "learning_rate": 4.925558766126794e-05, "loss": 0.2886, "step": 577, "task_loss": 0.16912756860256195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7191731072731453, "compression_loss": 0.0, "distillation_loss": 0.49650150537490845, "epoch": 0.55, "learning_rate": 4.9253004636610856e-05, "loss": 0.4803, "step": 578, "task_loss": 0.33414289355278015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7192040044030581, "compression_loss": 0.0, "distillation_loss": 0.39063113927841187, "epoch": 0.55, "learning_rate": 4.925041720627868e-05, "loss": 0.3818, "step": 579, "task_loss": 0.301964670419693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7192348936580812, "compression_loss": 0.0, "distillation_loss": 0.34917062520980835, "epoch": 0.55, "learning_rate": 4.9247825370741416e-05, "loss": 0.336, "step": 580, "task_loss": 0.2170882225036621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7192657750392183, "compression_loss": 0.0, "distillation_loss": 0.17031094431877136, "epoch": 0.55, "learning_rate": 4.924522913046991e-05, "loss": 0.1716, "step": 581, "task_loss": 0.18353326618671417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719296648547473, "compression_loss": 0.0, "distillation_loss": 0.10322088748216629, "epoch": 0.55, "learning_rate": 4.924262848593576e-05, "loss": 0.0994, "step": 582, "task_loss": 0.0652477964758873 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7193275141838491, "compression_loss": 0.0, "distillation_loss": 0.11138247698545456, "epoch": 0.55, "learning_rate": 4.924002343761139e-05, "loss": 0.1033, "step": 583, "task_loss": 0.030367694795131683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7193583719493502, "compression_loss": 0.0, "distillation_loss": 0.8268933892250061, "epoch": 0.55, "learning_rate": 4.923741398597002e-05, "loss": 0.7975, "step": 584, "task_loss": 0.5331718921661377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7193892218449799, "compression_loss": 0.0, "distillation_loss": 0.535077691078186, "epoch": 0.56, "learning_rate": 4.9234800131485675e-05, "loss": 0.5079, "step": 585, "task_loss": 0.263676255941391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7194200638717421, "compression_loss": 0.0, "distillation_loss": 0.2574825882911682, "epoch": 0.56, "learning_rate": 4.9232181874633164e-05, "loss": 0.2479, "step": 586, "task_loss": 0.16171754896640778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7194508980306403, "compression_loss": 0.0, "distillation_loss": 0.28252261877059937, "epoch": 0.56, "learning_rate": 4.922955921588812e-05, "loss": 0.2685, "step": 587, "task_loss": 0.14232273399829865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7194817243226783, "compression_loss": 0.0, "distillation_loss": 0.5040257573127747, "epoch": 0.56, "learning_rate": 4.922693215572695e-05, "loss": 0.4797, "step": 588, "task_loss": 0.2605317533016205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7195125427488598, "compression_loss": 0.0, "distillation_loss": 0.2037096619606018, "epoch": 0.56, "learning_rate": 4.922430069462688e-05, "loss": 0.1908, "step": 589, "task_loss": 0.07464735209941864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7195433533101884, "compression_loss": 0.0, "distillation_loss": 0.21558362245559692, "epoch": 0.56, "learning_rate": 4.9221664833065914e-05, "loss": 0.2088, "step": 590, "task_loss": 0.14768168330192566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7195741560076677, "compression_loss": 0.0, "distillation_loss": 0.40281903743743896, "epoch": 0.56, "learning_rate": 4.921902457152289e-05, "loss": 0.3805, "step": 591, "task_loss": 0.18012678623199463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7196049508423016, "compression_loss": 0.0, "distillation_loss": 0.3910696804523468, "epoch": 0.56, "learning_rate": 4.9216379910477403e-05, "loss": 0.3727, "step": 592, "task_loss": 0.20783747732639313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7196357378150936, "compression_loss": 0.0, "distillation_loss": 0.24493408203125, "epoch": 0.56, "learning_rate": 4.921373085040988e-05, "loss": 0.2339, "step": 593, "task_loss": 0.13446438312530518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7196665169270474, "compression_loss": 0.0, "distillation_loss": 0.13932454586029053, "epoch": 0.56, "learning_rate": 4.921107739180153e-05, "loss": 0.1317, "step": 594, "task_loss": 0.06355321407318115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7196972881791668, "compression_loss": 0.0, "distillation_loss": 0.395768940448761, "epoch": 0.57, "learning_rate": 4.9208419535134376e-05, "loss": 0.384, "step": 595, "task_loss": 0.2776716947555542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7197280515724555, "compression_loss": 0.0, "distillation_loss": 0.24235188961029053, "epoch": 0.57, "learning_rate": 4.920575728089122e-05, "loss": 0.2304, "step": 596, "task_loss": 0.1224876269698143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719758807107917, "compression_loss": 0.0, "distillation_loss": 0.11212563514709473, "epoch": 0.57, "learning_rate": 4.920309062955568e-05, "loss": 0.1317, "step": 597, "task_loss": 0.3079983592033386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719789554786555, "compression_loss": 0.0, "distillation_loss": 0.3031010627746582, "epoch": 0.57, "learning_rate": 4.920041958161217e-05, "loss": 0.2929, "step": 598, "task_loss": 0.20059747993946075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7198202946093734, "compression_loss": 0.0, "distillation_loss": 0.6109755039215088, "epoch": 0.57, "learning_rate": 4.9197744137545884e-05, "loss": 0.5924, "step": 599, "task_loss": 0.4249090254306793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7198510265773757, "compression_loss": 0.0, "distillation_loss": 0.33642804622650146, "epoch": 0.57, "learning_rate": 4.919506429784284e-05, "loss": 0.3205, "step": 600, "task_loss": 0.17708787322044373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7198817506915656, "compression_loss": 0.0, "distillation_loss": 0.191384956240654, "epoch": 0.57, "learning_rate": 4.919238006298984e-05, "loss": 0.2127, "step": 601, "task_loss": 0.40497034788131714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7199124669529468, "compression_loss": 0.0, "distillation_loss": 0.19533386826515198, "epoch": 0.57, "learning_rate": 4.9189691433474494e-05, "loss": 0.1888, "step": 602, "task_loss": 0.12987524271011353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.719943175362523, "compression_loss": 0.0, "distillation_loss": 0.3835797607898712, "epoch": 0.57, "learning_rate": 4.91869984097852e-05, "loss": 0.3607, "step": 603, "task_loss": 0.15473908185958862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7199738759212977, "compression_loss": 0.0, "distillation_loss": 0.2762410342693329, "epoch": 0.57, "learning_rate": 4.918430099241116e-05, "loss": 0.2624, "step": 604, "task_loss": 0.137907475233078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.720004568630275, "compression_loss": 0.0, "distillation_loss": 0.11789911985397339, "epoch": 0.57, "learning_rate": 4.918159918184236e-05, "loss": 0.1095, "step": 605, "task_loss": 0.03381138667464256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7200352534904582, "compression_loss": 0.0, "distillation_loss": 0.26301029324531555, "epoch": 0.58, "learning_rate": 4.9178892978569625e-05, "loss": 0.2488, "step": 606, "task_loss": 0.12070365250110626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7200659305028512, "compression_loss": 0.0, "distillation_loss": 0.1380884349346161, "epoch": 0.58, "learning_rate": 4.9176182383084524e-05, "loss": 0.1346, "step": 607, "task_loss": 0.10351494699716568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7200965996684575, "compression_loss": 0.0, "distillation_loss": 0.4840831756591797, "epoch": 0.58, "learning_rate": 4.917346739587946e-05, "loss": 0.4609, "step": 608, "task_loss": 0.25223809480667114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.720127260988281, "compression_loss": 0.0, "distillation_loss": 0.28585392236709595, "epoch": 0.58, "learning_rate": 4.917074801744763e-05, "loss": 0.2685, "step": 609, "task_loss": 0.112055703997612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7201579144633251, "compression_loss": 0.0, "distillation_loss": 0.379632830619812, "epoch": 0.58, "learning_rate": 4.916802424828301e-05, "loss": 0.3577, "step": 610, "task_loss": 0.15994824469089508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7201885600945938, "compression_loss": 0.0, "distillation_loss": 0.3786671757698059, "epoch": 0.58, "learning_rate": 4.9165296088880384e-05, "loss": 0.3794, "step": 611, "task_loss": 0.38576364517211914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7202191978830905, "compression_loss": 0.0, "distillation_loss": 0.2917826771736145, "epoch": 0.58, "learning_rate": 4.916256353973535e-05, "loss": 0.2759, "step": 612, "task_loss": 0.13341909646987915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7202498278298191, "compression_loss": 0.0, "distillation_loss": 0.24248436093330383, "epoch": 0.58, "learning_rate": 4.9159826601344286e-05, "loss": 0.2274, "step": 613, "task_loss": 0.0919809564948082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7202804499357831, "compression_loss": 0.0, "distillation_loss": 0.12847685813903809, "epoch": 0.58, "learning_rate": 4.915708527420435e-05, "loss": 0.1231, "step": 614, "task_loss": 0.0744004100561142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7203110642019863, "compression_loss": 0.0, "distillation_loss": 0.1666882485151291, "epoch": 0.58, "learning_rate": 4.9154339558813546e-05, "loss": 0.1631, "step": 615, "task_loss": 0.13053244352340698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7203416706294323, "compression_loss": 0.0, "distillation_loss": 0.7391297817230225, "epoch": 0.58, "learning_rate": 4.915158945567062e-05, "loss": 0.7177, "step": 616, "task_loss": 0.5249520540237427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.720372269219125, "compression_loss": 0.0, "distillation_loss": 0.18314534425735474, "epoch": 0.59, "learning_rate": 4.914883496527516e-05, "loss": 0.1906, "step": 617, "task_loss": 0.2574879825115204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7204028599720679, "compression_loss": 0.0, "distillation_loss": 0.38090646266937256, "epoch": 0.59, "learning_rate": 4.914607608812753e-05, "loss": 0.3593, "step": 618, "task_loss": 0.1652752161026001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7204334428892646, "compression_loss": 0.0, "distillation_loss": 0.1799452304840088, "epoch": 0.59, "learning_rate": 4.9143312824728896e-05, "loss": 0.1885, "step": 619, "task_loss": 0.26563724875450134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7204640179717189, "compression_loss": 0.0, "distillation_loss": 0.2368253916501999, "epoch": 0.59, "learning_rate": 4.91405451755812e-05, "loss": 0.2235, "step": 620, "task_loss": 0.10326235741376877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7204945852204344, "compression_loss": 0.0, "distillation_loss": 0.5095282196998596, "epoch": 0.59, "learning_rate": 4.913777314118721e-05, "loss": 0.4846, "step": 621, "task_loss": 0.2607031464576721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7205251446364149, "compression_loss": 0.0, "distillation_loss": 0.12455812841653824, "epoch": 0.59, "learning_rate": 4.9134996722050483e-05, "loss": 0.1324, "step": 622, "task_loss": 0.20319408178329468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7205556962206641, "compression_loss": 0.0, "distillation_loss": 0.3534661531448364, "epoch": 0.59, "learning_rate": 4.913221591867537e-05, "loss": 0.3352, "step": 623, "task_loss": 0.17124255001544952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7205862399741856, "compression_loss": 0.0, "distillation_loss": 0.06430540978908539, "epoch": 0.59, "learning_rate": 4.912943073156701e-05, "loss": 0.0592, "step": 624, "task_loss": 0.013450298458337784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.720616775897983, "compression_loss": 0.0, "distillation_loss": 0.26247042417526245, "epoch": 0.59, "learning_rate": 4.912664116123134e-05, "loss": 0.2455, "step": 625, "task_loss": 0.09270986914634705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7206473039930601, "compression_loss": 0.0, "distillation_loss": 0.5087012052536011, "epoch": 0.59, "learning_rate": 4.9123847208175126e-05, "loss": 0.4844, "step": 626, "task_loss": 0.2655639946460724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7206778242604206, "compression_loss": 0.0, "distillation_loss": 0.2001875340938568, "epoch": 0.6, "learning_rate": 4.912104887290587e-05, "loss": 0.1939, "step": 627, "task_loss": 0.13689836859703064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7207083367010682, "compression_loss": 0.0, "distillation_loss": 0.19963237643241882, "epoch": 0.6, "learning_rate": 4.911824615593193e-05, "loss": 0.1886, "step": 628, "task_loss": 0.08933916687965393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7207388413160064, "compression_loss": 0.0, "distillation_loss": 0.4403138756752014, "epoch": 0.6, "learning_rate": 4.9115439057762416e-05, "loss": 0.4213, "step": 629, "task_loss": 0.2505990266799927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.720769338106239, "compression_loss": 0.0, "distillation_loss": 0.4388275146484375, "epoch": 0.6, "learning_rate": 4.911262757890726e-05, "loss": 0.4296, "step": 630, "task_loss": 0.3462795615196228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7207998270727697, "compression_loss": 0.0, "distillation_loss": 0.33021849393844604, "epoch": 0.6, "learning_rate": 4.9109811719877166e-05, "loss": 0.3132, "step": 631, "task_loss": 0.16027683019638062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7208303082166022, "compression_loss": 0.0, "distillation_loss": 0.483683317899704, "epoch": 0.6, "learning_rate": 4.910699148118367e-05, "loss": 0.4708, "step": 632, "task_loss": 0.3550935983657837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7208607815387402, "compression_loss": 0.0, "distillation_loss": 0.39671921730041504, "epoch": 0.6, "learning_rate": 4.910416686333906e-05, "loss": 0.3752, "step": 633, "task_loss": 0.18194958567619324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7208912470401873, "compression_loss": 0.0, "distillation_loss": 0.5867143273353577, "epoch": 0.6, "learning_rate": 4.910133786685646e-05, "loss": 0.5733, "step": 634, "task_loss": 0.4523051083087921 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7209217047219472, "compression_loss": 0.0, "distillation_loss": 0.43349331617355347, "epoch": 0.6, "learning_rate": 4.9098504492249764e-05, "loss": 0.4114, "step": 635, "task_loss": 0.2126692235469818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7209521545850235, "compression_loss": 0.0, "distillation_loss": 0.35138991475105286, "epoch": 0.6, "learning_rate": 4.9095666740033664e-05, "loss": 0.3348, "step": 636, "task_loss": 0.18542218208312988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7209825966304201, "compression_loss": 0.0, "distillation_loss": 0.43535393476486206, "epoch": 0.6, "learning_rate": 4.9092824610723655e-05, "loss": 0.4145, "step": 637, "task_loss": 0.22690628468990326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7210130308591405, "compression_loss": 0.0, "distillation_loss": 0.2631605565547943, "epoch": 0.61, "learning_rate": 4.908997810483602e-05, "loss": 0.2534, "step": 638, "task_loss": 0.1657785326242447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7210434572721884, "compression_loss": 0.0, "distillation_loss": 0.4615224003791809, "epoch": 0.61, "learning_rate": 4.908712722288785e-05, "loss": 0.4336, "step": 639, "task_loss": 0.1823965460062027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7210738758705676, "compression_loss": 0.0, "distillation_loss": 0.3323667645454407, "epoch": 0.61, "learning_rate": 4.9084271965397014e-05, "loss": 0.3282, "step": 640, "task_loss": 0.290405809879303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7211042866552817, "compression_loss": 0.0, "distillation_loss": 0.27497389912605286, "epoch": 0.61, "learning_rate": 4.908141233288218e-05, "loss": 0.2726, "step": 641, "task_loss": 0.2515375018119812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7211346896273344, "compression_loss": 0.0, "distillation_loss": 0.1021830216050148, "epoch": 0.61, "learning_rate": 4.907854832586282e-05, "loss": 0.094, "step": 642, "task_loss": 0.020755015313625336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7211650847877293, "compression_loss": 0.0, "distillation_loss": 0.19680382311344147, "epoch": 0.61, "learning_rate": 4.907567994485919e-05, "loss": 0.1992, "step": 643, "task_loss": 0.2208843231201172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7211954721374703, "compression_loss": 0.0, "distillation_loss": 0.5825210809707642, "epoch": 0.61, "learning_rate": 4.9072807190392354e-05, "loss": 0.5626, "step": 644, "task_loss": 0.383215069770813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7212258516775608, "compression_loss": 0.0, "distillation_loss": 0.3526345491409302, "epoch": 0.61, "learning_rate": 4.906993006298416e-05, "loss": 0.3389, "step": 645, "task_loss": 0.21486197412014008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7212562234090046, "compression_loss": 0.0, "distillation_loss": 0.4113069772720337, "epoch": 0.61, "learning_rate": 4.9067048563157235e-05, "loss": 0.4089, "step": 646, "task_loss": 0.38709861040115356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7212865873328055, "compression_loss": 0.0, "distillation_loss": 0.27968546748161316, "epoch": 0.61, "learning_rate": 4.906416269143505e-05, "loss": 0.2654, "step": 647, "task_loss": 0.13687899708747864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.721316943449967, "compression_loss": 0.0, "distillation_loss": 0.4654969274997711, "epoch": 0.62, "learning_rate": 4.90612724483418e-05, "loss": 0.4424, "step": 648, "task_loss": 0.23491407930850983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7213472917614929, "compression_loss": 0.0, "distillation_loss": 0.35083064436912537, "epoch": 0.62, "learning_rate": 4.905837783440253e-05, "loss": 0.3333, "step": 649, "task_loss": 0.17527225613594055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7213776322683869, "compression_loss": 0.0, "distillation_loss": 0.7668882608413696, "epoch": 0.62, "learning_rate": 4.905547885014307e-05, "loss": 0.7316, "step": 650, "task_loss": 0.4138070046901703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7214079649716525, "compression_loss": 0.0, "distillation_loss": 0.25231415033340454, "epoch": 0.62, "learning_rate": 4.9052575496090016e-05, "loss": 0.2521, "step": 651, "task_loss": 0.2498636394739151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7214382898722936, "compression_loss": 0.0, "distillation_loss": 0.39607787132263184, "epoch": 0.62, "learning_rate": 4.904966777277079e-05, "loss": 0.3755, "step": 652, "task_loss": 0.19066929817199707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7214686069713138, "compression_loss": 0.0, "distillation_loss": 0.33866336941719055, "epoch": 0.62, "learning_rate": 4.9046755680713586e-05, "loss": 0.3344, "step": 653, "task_loss": 0.2955394387245178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7214989162697167, "compression_loss": 0.0, "distillation_loss": 0.26593947410583496, "epoch": 0.62, "learning_rate": 4.90438392204474e-05, "loss": 0.2494, "step": 654, "task_loss": 0.10030423104763031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7215292177685061, "compression_loss": 0.0, "distillation_loss": 0.4320967495441437, "epoch": 0.62, "learning_rate": 4.9040918392502026e-05, "loss": 0.4192, "step": 655, "task_loss": 0.30289995670318604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7215595114686856, "compression_loss": 0.0, "distillation_loss": 0.15407636761665344, "epoch": 0.62, "learning_rate": 4.903799319740804e-05, "loss": 0.145, "step": 656, "task_loss": 0.0632171630859375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.721589797371259, "compression_loss": 0.0, "distillation_loss": 0.11901634931564331, "epoch": 0.62, "learning_rate": 4.903506363569683e-05, "loss": 0.1128, "step": 657, "task_loss": 0.05649835988879204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7216200754772298, "compression_loss": 0.0, "distillation_loss": 0.2876704931259155, "epoch": 0.62, "learning_rate": 4.9032129707900556e-05, "loss": 0.283, "step": 658, "task_loss": 0.2414519190788269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7216503457876018, "compression_loss": 0.0, "distillation_loss": 0.1815006583929062, "epoch": 0.63, "learning_rate": 4.9029191414552165e-05, "loss": 0.1867, "step": 659, "task_loss": 0.23326468467712402 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7216806083033788, "compression_loss": 0.0, "distillation_loss": 0.28646424412727356, "epoch": 0.63, "learning_rate": 4.9026248756185445e-05, "loss": 0.2784, "step": 660, "task_loss": 0.20588558912277222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7217108630255642, "compression_loss": 0.0, "distillation_loss": 0.22158020734786987, "epoch": 0.63, "learning_rate": 4.902330173333492e-05, "loss": 0.2171, "step": 661, "task_loss": 0.17726564407348633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7217411099551618, "compression_loss": 0.0, "distillation_loss": 0.41758403182029724, "epoch": 0.63, "learning_rate": 4.9020350346535936e-05, "loss": 0.3971, "step": 662, "task_loss": 0.21297332644462585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7217713490931754, "compression_loss": 0.0, "distillation_loss": 0.16590705513954163, "epoch": 0.63, "learning_rate": 4.901739459632463e-05, "loss": 0.1547, "step": 663, "task_loss": 0.05394207686185837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7218015804406086, "compression_loss": 0.0, "distillation_loss": 0.22256065905094147, "epoch": 0.63, "learning_rate": 4.901443448323792e-05, "loss": 0.2239, "step": 664, "task_loss": 0.2363419383764267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.721831803998465, "compression_loss": 0.0, "distillation_loss": 0.17048153281211853, "epoch": 0.63, "learning_rate": 4.901147000781355e-05, "loss": 0.1603, "step": 665, "task_loss": 0.068704754114151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7218620197677484, "compression_loss": 0.0, "distillation_loss": 0.5146390199661255, "epoch": 0.63, "learning_rate": 4.9008501170589996e-05, "loss": 0.4834, "step": 666, "task_loss": 0.20194561779499054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7218922277494625, "compression_loss": 0.0, "distillation_loss": 0.27669647336006165, "epoch": 0.63, "learning_rate": 4.900552797210658e-05, "loss": 0.2729, "step": 667, "task_loss": 0.23846843838691711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7219224279446109, "compression_loss": 0.0, "distillation_loss": 0.5004544258117676, "epoch": 0.63, "learning_rate": 4.90025504129034e-05, "loss": 0.4772, "step": 668, "task_loss": 0.26774314045906067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7219526203541973, "compression_loss": 0.0, "distillation_loss": 0.15412703156471252, "epoch": 0.64, "learning_rate": 4.8999568493521345e-05, "loss": 0.1447, "step": 669, "task_loss": 0.0594392754137516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7219828049792254, "compression_loss": 0.0, "distillation_loss": 0.34250378608703613, "epoch": 0.64, "learning_rate": 4.899658221450208e-05, "loss": 0.3341, "step": 670, "task_loss": 0.2580875754356384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7220129818206988, "compression_loss": 0.0, "distillation_loss": 0.334232360124588, "epoch": 0.64, "learning_rate": 4.899359157638809e-05, "loss": 0.3119, "step": 671, "task_loss": 0.1106051504611969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7220431508796213, "compression_loss": 0.0, "distillation_loss": 0.15714368224143982, "epoch": 0.64, "learning_rate": 4.899059657972264e-05, "loss": 0.1483, "step": 672, "task_loss": 0.06857656687498093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7220733121569965, "compression_loss": 0.0, "distillation_loss": 0.47153952717781067, "epoch": 0.64, "learning_rate": 4.898759722504977e-05, "loss": 0.4422, "step": 673, "task_loss": 0.17770282924175262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7221034656538281, "compression_loss": 0.0, "distillation_loss": 0.24304763972759247, "epoch": 0.64, "learning_rate": 4.8984593512914356e-05, "loss": 0.2322, "step": 674, "task_loss": 0.1346360594034195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7221336113711199, "compression_loss": 0.0, "distillation_loss": 0.20216722786426544, "epoch": 0.64, "learning_rate": 4.898158544386201e-05, "loss": 0.2003, "step": 675, "task_loss": 0.18363113701343536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7221637493098754, "compression_loss": 0.0, "distillation_loss": 0.3209993243217468, "epoch": 0.64, "learning_rate": 4.897857301843917e-05, "loss": 0.3081, "step": 676, "task_loss": 0.19210395216941833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7221938794710984, "compression_loss": 0.0, "distillation_loss": 0.38004785776138306, "epoch": 0.64, "learning_rate": 4.897555623719306e-05, "loss": 0.3761, "step": 677, "task_loss": 0.34037119150161743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7222240018557925, "compression_loss": 0.0, "distillation_loss": 0.27278655767440796, "epoch": 0.64, "learning_rate": 4.897253510067169e-05, "loss": 0.2581, "step": 678, "task_loss": 0.12568269670009613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7222541164649614, "compression_loss": 0.0, "distillation_loss": 0.48017337918281555, "epoch": 0.64, "learning_rate": 4.896950960942387e-05, "loss": 0.4528, "step": 679, "task_loss": 0.20639115571975708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7222842232996088, "compression_loss": 0.0, "distillation_loss": 0.12991678714752197, "epoch": 0.65, "learning_rate": 4.896647976399919e-05, "loss": 0.1368, "step": 680, "task_loss": 0.19825144112110138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7223143223607386, "compression_loss": 0.0, "distillation_loss": 0.2807837724685669, "epoch": 0.65, "learning_rate": 4.896344556494804e-05, "loss": 0.2644, "step": 681, "task_loss": 0.11680983006954193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.722344413649354, "compression_loss": 0.0, "distillation_loss": 0.36504942178726196, "epoch": 0.65, "learning_rate": 4.8960407012821584e-05, "loss": 0.3483, "step": 682, "task_loss": 0.19777251780033112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7223744971664591, "compression_loss": 0.0, "distillation_loss": 0.24195873737335205, "epoch": 0.65, "learning_rate": 4.895736410817181e-05, "loss": 0.2359, "step": 683, "task_loss": 0.1811816245317459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7224045729130574, "compression_loss": 0.0, "distillation_loss": 0.17951583862304688, "epoch": 0.65, "learning_rate": 4.8954316851551465e-05, "loss": 0.1744, "step": 684, "task_loss": 0.12814725935459137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7224346408901526, "compression_loss": 0.0, "distillation_loss": 0.3897627592086792, "epoch": 0.65, "learning_rate": 4.895126524351409e-05, "loss": 0.3718, "step": 685, "task_loss": 0.20980247855186462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7224647010987484, "compression_loss": 0.0, "distillation_loss": 0.15180069208145142, "epoch": 0.65, "learning_rate": 4.8948209284614046e-05, "loss": 0.144, "step": 686, "task_loss": 0.073697030544281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7224947535398485, "compression_loss": 0.0, "distillation_loss": 0.3309364914894104, "epoch": 0.65, "learning_rate": 4.894514897540643e-05, "loss": 0.3178, "step": 687, "task_loss": 0.1999032199382782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7225247982144566, "compression_loss": 0.0, "distillation_loss": 0.5048620700836182, "epoch": 0.65, "learning_rate": 4.89420843164472e-05, "loss": 0.475, "step": 688, "task_loss": 0.20606492459774017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7225548351235763, "compression_loss": 0.0, "distillation_loss": 0.26741254329681396, "epoch": 0.65, "learning_rate": 4.893901530829304e-05, "loss": 0.2534, "step": 689, "task_loss": 0.12686991691589355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7225848642682113, "compression_loss": 0.0, "distillation_loss": 0.454744428396225, "epoch": 0.66, "learning_rate": 4.8935941951501463e-05, "loss": 0.4328, "step": 690, "task_loss": 0.2351890504360199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7226148856493654, "compression_loss": 0.0, "distillation_loss": 0.4148391783237457, "epoch": 0.66, "learning_rate": 4.893286424663075e-05, "loss": 0.391, "step": 691, "task_loss": 0.17629502713680267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7226448992680421, "compression_loss": 0.0, "distillation_loss": 0.3470875322818756, "epoch": 0.66, "learning_rate": 4.892978219423998e-05, "loss": 0.3301, "step": 692, "task_loss": 0.17764955759048462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7226749051252452, "compression_loss": 0.0, "distillation_loss": 0.31869742274284363, "epoch": 0.66, "learning_rate": 4.892669579488903e-05, "loss": 0.3036, "step": 693, "task_loss": 0.16786476969718933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7227049032219783, "compression_loss": 0.0, "distillation_loss": 0.24134713411331177, "epoch": 0.66, "learning_rate": 4.892360504913856e-05, "loss": 0.2292, "step": 694, "task_loss": 0.12003158777952194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7227348935592453, "compression_loss": 0.0, "distillation_loss": 0.19437089562416077, "epoch": 0.66, "learning_rate": 4.8920509957550016e-05, "loss": 0.1999, "step": 695, "task_loss": 0.24951303005218506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7227648761380496, "compression_loss": 0.0, "distillation_loss": 0.2377103865146637, "epoch": 0.66, "learning_rate": 4.8917410520685635e-05, "loss": 0.2238, "step": 696, "task_loss": 0.09895863384008408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7227948509593951, "compression_loss": 0.0, "distillation_loss": 0.09030158072710037, "epoch": 0.66, "learning_rate": 4.891430673910844e-05, "loss": 0.0912, "step": 697, "task_loss": 0.09956920146942139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7228248180242853, "compression_loss": 0.0, "distillation_loss": 0.20243610441684723, "epoch": 0.66, "learning_rate": 4.891119861338226e-05, "loss": 0.1953, "step": 698, "task_loss": 0.13071851432323456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7228547773337239, "compression_loss": 0.0, "distillation_loss": 0.1336769163608551, "epoch": 0.66, "learning_rate": 4.8908086144071694e-05, "loss": 0.1241, "step": 699, "task_loss": 0.03775034099817276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7228847288887148, "compression_loss": 0.0, "distillation_loss": 0.16907915472984314, "epoch": 0.66, "learning_rate": 4.8904969331742136e-05, "loss": 0.1675, "step": 700, "task_loss": 0.15347930788993835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7229146726902614, "compression_loss": 0.0, "distillation_loss": 0.15456578135490417, "epoch": 0.67, "learning_rate": 4.890184817695976e-05, "loss": 0.1522, "step": 701, "task_loss": 0.13044598698616028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7229446087393676, "compression_loss": 0.0, "distillation_loss": 0.18475860357284546, "epoch": 0.67, "learning_rate": 4.8898722680291564e-05, "loss": 0.1854, "step": 702, "task_loss": 0.19159327447414398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7229745370370371, "compression_loss": 0.0, "distillation_loss": 0.3150096535682678, "epoch": 0.67, "learning_rate": 4.8895592842305295e-05, "loss": 0.3036, "step": 703, "task_loss": 0.20053307712078094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7230044575842732, "compression_loss": 0.0, "distillation_loss": 0.3283127546310425, "epoch": 0.67, "learning_rate": 4.88924586635695e-05, "loss": 0.322, "step": 704, "task_loss": 0.2651616036891937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7230343703820801, "compression_loss": 0.0, "distillation_loss": 0.19642552733421326, "epoch": 0.67, "learning_rate": 4.888932014465352e-05, "loss": 0.1945, "step": 705, "task_loss": 0.1771748960018158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7230642754314611, "compression_loss": 0.0, "distillation_loss": 0.23262354731559753, "epoch": 0.67, "learning_rate": 4.888617728612749e-05, "loss": 0.2279, "step": 706, "task_loss": 0.18583974242210388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7230941727334201, "compression_loss": 0.0, "distillation_loss": 0.329001247882843, "epoch": 0.67, "learning_rate": 4.888303008856231e-05, "loss": 0.3181, "step": 707, "task_loss": 0.21996405720710754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7231240622889608, "compression_loss": 0.0, "distillation_loss": 0.5461586713790894, "epoch": 0.67, "learning_rate": 4.88798785525297e-05, "loss": 0.5225, "step": 708, "task_loss": 0.3095916509628296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7231539440990866, "compression_loss": 0.0, "distillation_loss": 0.29736411571502686, "epoch": 0.67, "learning_rate": 4.887672267860214e-05, "loss": 0.2917, "step": 709, "task_loss": 0.2409740537405014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7231838181648014, "compression_loss": 0.0, "distillation_loss": 0.33324652910232544, "epoch": 0.67, "learning_rate": 4.887356246735292e-05, "loss": 0.3262, "step": 710, "task_loss": 0.2625032365322113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7232136844871089, "compression_loss": 0.0, "distillation_loss": 0.3226258158683777, "epoch": 0.68, "learning_rate": 4.8870397919356094e-05, "loss": 0.3109, "step": 711, "task_loss": 0.20536869764328003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7232435430670128, "compression_loss": 0.0, "distillation_loss": 0.3054695427417755, "epoch": 0.68, "learning_rate": 4.8867229035186526e-05, "loss": 0.3009, "step": 712, "task_loss": 0.25971150398254395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7232733939055166, "compression_loss": 0.0, "distillation_loss": 0.2144642472267151, "epoch": 0.68, "learning_rate": 4.886405581541986e-05, "loss": 0.2068, "step": 713, "task_loss": 0.13790678977966309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7233032370036241, "compression_loss": 0.0, "distillation_loss": 0.193600594997406, "epoch": 0.68, "learning_rate": 4.886087826063252e-05, "loss": 0.1853, "step": 714, "task_loss": 0.11010687053203583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.723333072362339, "compression_loss": 0.0, "distillation_loss": 0.3077680170536041, "epoch": 0.68, "learning_rate": 4.8857696371401735e-05, "loss": 0.2948, "step": 715, "task_loss": 0.1778283566236496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.723362899982665, "compression_loss": 0.0, "distillation_loss": 0.24543452262878418, "epoch": 0.68, "learning_rate": 4.88545101483055e-05, "loss": 0.2343, "step": 716, "task_loss": 0.13451889157295227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7233927198656058, "compression_loss": 0.0, "distillation_loss": 0.4300387501716614, "epoch": 0.68, "learning_rate": 4.885131959192262e-05, "loss": 0.412, "step": 717, "task_loss": 0.24990172684192657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7234225320121649, "compression_loss": 0.0, "distillation_loss": 0.368339866399765, "epoch": 0.68, "learning_rate": 4.884812470283265e-05, "loss": 0.3664, "step": 718, "task_loss": 0.348560631275177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7234523364233462, "compression_loss": 0.0, "distillation_loss": 0.18903806805610657, "epoch": 0.68, "learning_rate": 4.884492548161599e-05, "loss": 0.186, "step": 719, "task_loss": 0.1587422788143158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7234821331001532, "compression_loss": 0.0, "distillation_loss": 0.2814609110355377, "epoch": 0.68, "learning_rate": 4.8841721928853776e-05, "loss": 0.2723, "step": 720, "task_loss": 0.19001665711402893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7235119220435898, "compression_loss": 0.0, "distillation_loss": 0.4020082950592041, "epoch": 0.68, "learning_rate": 4.8838514045127945e-05, "loss": 0.385, "step": 721, "task_loss": 0.23192302882671356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7235417032546595, "compression_loss": 0.0, "distillation_loss": 0.3017335832118988, "epoch": 0.69, "learning_rate": 4.883530183102123e-05, "loss": 0.29, "step": 722, "task_loss": 0.1840285062789917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.723571476734366, "compression_loss": 0.0, "distillation_loss": 0.27277618646621704, "epoch": 0.69, "learning_rate": 4.883208528711715e-05, "loss": 0.2582, "step": 723, "task_loss": 0.1269429624080658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.723601242483713, "compression_loss": 0.0, "distillation_loss": 0.2977296710014343, "epoch": 0.69, "learning_rate": 4.8828864413999995e-05, "loss": 0.2751, "step": 724, "task_loss": 0.07180985063314438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7236310005037043, "compression_loss": 0.0, "distillation_loss": 0.08697059750556946, "epoch": 0.69, "learning_rate": 4.8825639212254865e-05, "loss": 0.0934, "step": 725, "task_loss": 0.15161065757274628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7236607507953434, "compression_loss": 0.0, "distillation_loss": 0.190194234251976, "epoch": 0.69, "learning_rate": 4.882240968246762e-05, "loss": 0.1799, "step": 726, "task_loss": 0.087301105260849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7236904933596341, "compression_loss": 0.0, "distillation_loss": 0.22055265307426453, "epoch": 0.69, "learning_rate": 4.8819175825224925e-05, "loss": 0.2109, "step": 727, "task_loss": 0.12390641123056412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7237202281975801, "compression_loss": 0.0, "distillation_loss": 0.18130475282669067, "epoch": 0.69, "learning_rate": 4.881593764111424e-05, "loss": 0.1789, "step": 728, "task_loss": 0.15703324973583221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7237499553101849, "compression_loss": 0.0, "distillation_loss": 0.21481934189796448, "epoch": 0.69, "learning_rate": 4.8812695130723775e-05, "loss": 0.2068, "step": 729, "task_loss": 0.13483870029449463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7237796746984524, "compression_loss": 0.0, "distillation_loss": 0.2288239449262619, "epoch": 0.69, "learning_rate": 4.880944829464256e-05, "loss": 0.2152, "step": 730, "task_loss": 0.09266623109579086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7238093863633862, "compression_loss": 0.0, "distillation_loss": 0.43135643005371094, "epoch": 0.69, "learning_rate": 4.880619713346039e-05, "loss": 0.4258, "step": 731, "task_loss": 0.37580642104148865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7238390903059899, "compression_loss": 0.0, "distillation_loss": 0.5182836055755615, "epoch": 0.7, "learning_rate": 4.8802941647767856e-05, "loss": 0.4974, "step": 732, "task_loss": 0.3091394901275635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7238687865272674, "compression_loss": 0.0, "distillation_loss": 0.33886805176734924, "epoch": 0.7, "learning_rate": 4.879968183815634e-05, "loss": 0.3219, "step": 733, "task_loss": 0.16887077689170837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7238984750282221, "compression_loss": 0.0, "distillation_loss": 0.16470275819301605, "epoch": 0.7, "learning_rate": 4.8796417705217994e-05, "loss": 0.1544, "step": 734, "task_loss": 0.06122714653611183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7239281558098579, "compression_loss": 0.0, "distillation_loss": 0.3576793074607849, "epoch": 0.7, "learning_rate": 4.879314924954577e-05, "loss": 0.3392, "step": 735, "task_loss": 0.1729682832956314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7239578288731784, "compression_loss": 0.0, "distillation_loss": 0.1777103692293167, "epoch": 0.7, "learning_rate": 4.87898764717334e-05, "loss": 0.1658, "step": 736, "task_loss": 0.0585070438683033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7239874942191873, "compression_loss": 0.0, "distillation_loss": 0.21319881081581116, "epoch": 0.7, "learning_rate": 4.8786599372375384e-05, "loss": 0.2003, "step": 737, "task_loss": 0.08430507779121399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7240171518488882, "compression_loss": 0.0, "distillation_loss": 0.4297768473625183, "epoch": 0.7, "learning_rate": 4.878331795206705e-05, "loss": 0.4102, "step": 738, "task_loss": 0.23426848649978638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7240468017632848, "compression_loss": 0.0, "distillation_loss": 0.44237983226776123, "epoch": 0.7, "learning_rate": 4.878003221140446e-05, "loss": 0.4247, "step": 739, "task_loss": 0.2660304307937622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.724076443963381, "compression_loss": 0.0, "distillation_loss": 0.47253918647766113, "epoch": 0.7, "learning_rate": 4.877674215098449e-05, "loss": 0.4542, "step": 740, "task_loss": 0.28894340991973877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7241060784501803, "compression_loss": 0.0, "distillation_loss": 0.3578449487686157, "epoch": 0.7, "learning_rate": 4.87734477714048e-05, "loss": 0.3449, "step": 741, "task_loss": 0.22871339321136475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7241357052246863, "compression_loss": 0.0, "distillation_loss": 0.41139233112335205, "epoch": 0.7, "learning_rate": 4.8770149073263833e-05, "loss": 0.3942, "step": 742, "task_loss": 0.23949165642261505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7241653242879028, "compression_loss": 0.0, "distillation_loss": 0.24485422670841217, "epoch": 0.71, "learning_rate": 4.87668460571608e-05, "loss": 0.2349, "step": 743, "task_loss": 0.1458071619272232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7241949356408335, "compression_loss": 0.0, "distillation_loss": 0.2777606248855591, "epoch": 0.71, "learning_rate": 4.8763538723695726e-05, "loss": 0.2744, "step": 744, "task_loss": 0.2439776211977005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7242245392844819, "compression_loss": 0.0, "distillation_loss": 0.4678876996040344, "epoch": 0.71, "learning_rate": 4.87602270734694e-05, "loss": 0.444, "step": 745, "task_loss": 0.22880345582962036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7242541352198519, "compression_loss": 0.0, "distillation_loss": 0.5279219150543213, "epoch": 0.71, "learning_rate": 4.8756911107083387e-05, "loss": 0.5044, "step": 746, "task_loss": 0.2927531599998474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7242837234479472, "compression_loss": 0.0, "distillation_loss": 0.35156458616256714, "epoch": 0.71, "learning_rate": 4.875359082514006e-05, "loss": 0.3392, "step": 747, "task_loss": 0.2275211364030838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7243133039697713, "compression_loss": 0.0, "distillation_loss": 0.18168729543685913, "epoch": 0.71, "learning_rate": 4.8750266228242555e-05, "loss": 0.1755, "step": 748, "task_loss": 0.11939448118209839 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7243428767863281, "compression_loss": 0.0, "distillation_loss": 0.3357431888580322, "epoch": 0.71, "learning_rate": 4.874693731699481e-05, "loss": 0.3205, "step": 749, "task_loss": 0.18339495360851288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.724372441898621, "compression_loss": 0.0, "distillation_loss": 0.2332255244255066, "epoch": 0.71, "learning_rate": 4.8743604092001544e-05, "loss": 0.2189, "step": 750, "task_loss": 0.08988796174526215 }, { "epoch": 0.71, "eval_accuracy": 0.8990825688073395, "eval_loss": 0.3525860905647278, "eval_runtime": 18.2739, "eval_samples_per_second": 47.718, "eval_steps_per_second": 5.965, "step": 750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.724401999307654, "compression_loss": 0.0, "distillation_loss": 0.23790621757507324, "epoch": 0.71, "learning_rate": 4.8740266553868236e-05, "loss": 0.2297, "step": 751, "task_loss": 0.15616914629936218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7244315490144305, "compression_loss": 0.0, "distillation_loss": 0.1910121589899063, "epoch": 0.71, "learning_rate": 4.873692470320117e-05, "loss": 0.1767, "step": 752, "task_loss": 0.04741794615983963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7244610910199543, "compression_loss": 0.0, "distillation_loss": 0.6017439961433411, "epoch": 0.72, "learning_rate": 4.8733578540607425e-05, "loss": 0.587, "step": 753, "task_loss": 0.45440027117729187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7244906253252291, "compression_loss": 0.0, "distillation_loss": 0.1898229718208313, "epoch": 0.72, "learning_rate": 4.8730228066694825e-05, "loss": 0.1778, "step": 754, "task_loss": 0.06922735273838043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7245201519312585, "compression_loss": 0.0, "distillation_loss": 0.1818356066942215, "epoch": 0.72, "learning_rate": 4.872687328207202e-05, "loss": 0.1712, "step": 755, "task_loss": 0.07538501918315887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7245496708390464, "compression_loss": 0.0, "distillation_loss": 0.3183574080467224, "epoch": 0.72, "learning_rate": 4.872351418734841e-05, "loss": 0.3017, "step": 756, "task_loss": 0.15180720388889313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7245791820495963, "compression_loss": 0.0, "distillation_loss": 0.35951346158981323, "epoch": 0.72, "learning_rate": 4.8720150783134196e-05, "loss": 0.3478, "step": 757, "task_loss": 0.2423841804265976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7246086855639118, "compression_loss": 0.0, "distillation_loss": 0.18601301312446594, "epoch": 0.72, "learning_rate": 4.871678307004035e-05, "loss": 0.1736, "step": 758, "task_loss": 0.06224619597196579 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7246381813829967, "compression_loss": 0.0, "distillation_loss": 0.3574899733066559, "epoch": 0.72, "learning_rate": 4.8713411048678635e-05, "loss": 0.3498, "step": 759, "task_loss": 0.280869722366333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7246676695078548, "compression_loss": 0.0, "distillation_loss": 0.4299178719520569, "epoch": 0.72, "learning_rate": 4.8710034719661614e-05, "loss": 0.4142, "step": 760, "task_loss": 0.2722673714160919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7246971499394895, "compression_loss": 0.0, "distillation_loss": 0.2085331380367279, "epoch": 0.72, "learning_rate": 4.870665408360258e-05, "loss": 0.1979, "step": 761, "task_loss": 0.10241978615522385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7247266226789048, "compression_loss": 0.0, "distillation_loss": 0.44271695613861084, "epoch": 0.72, "learning_rate": 4.870326914111567e-05, "loss": 0.424, "step": 762, "task_loss": 0.2551548182964325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7247560877271041, "compression_loss": 0.0, "distillation_loss": 0.36846601963043213, "epoch": 0.72, "learning_rate": 4.8699879892815756e-05, "loss": 0.3636, "step": 763, "task_loss": 0.31974709033966064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7247855450850912, "compression_loss": 0.0, "distillation_loss": 0.3941296935081482, "epoch": 0.73, "learning_rate": 4.8696486339318524e-05, "loss": 0.3761, "step": 764, "task_loss": 0.21385695040225983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7248149947538698, "compression_loss": 0.0, "distillation_loss": 0.26239490509033203, "epoch": 0.73, "learning_rate": 4.8693088481240424e-05, "loss": 0.2546, "step": 765, "task_loss": 0.18409638106822968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7248444367344437, "compression_loss": 0.0, "distillation_loss": 0.1657324880361557, "epoch": 0.73, "learning_rate": 4.86896863191987e-05, "loss": 0.156, "step": 766, "task_loss": 0.06812982261180878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7248738710278163, "compression_loss": 0.0, "distillation_loss": 0.22085291147232056, "epoch": 0.73, "learning_rate": 4.8686279853811356e-05, "loss": 0.2073, "step": 767, "task_loss": 0.08557181805372238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7249032976349914, "compression_loss": 0.0, "distillation_loss": 0.06682948768138885, "epoch": 0.73, "learning_rate": 4.8682869085697206e-05, "loss": 0.0704, "step": 768, "task_loss": 0.10302991420030594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7249327165569729, "compression_loss": 0.0, "distillation_loss": 0.22944039106369019, "epoch": 0.73, "learning_rate": 4.8679454015475835e-05, "loss": 0.2132, "step": 769, "task_loss": 0.06714411824941635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7249621277947641, "compression_loss": 0.0, "distillation_loss": 0.5438629388809204, "epoch": 0.73, "learning_rate": 4.867603464376759e-05, "loss": 0.5187, "step": 770, "task_loss": 0.291761577129364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.724991531349369, "compression_loss": 0.0, "distillation_loss": 0.13500595092773438, "epoch": 0.73, "learning_rate": 4.867261097119363e-05, "loss": 0.1268, "step": 771, "task_loss": 0.0527961365878582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7250209272217911, "compression_loss": 0.0, "distillation_loss": 0.2532868981361389, "epoch": 0.73, "learning_rate": 4.8669182998375884e-05, "loss": 0.2471, "step": 772, "task_loss": 0.19150203466415405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7250503154130341, "compression_loss": 0.0, "distillation_loss": 0.1466882824897766, "epoch": 0.73, "learning_rate": 4.8665750725937045e-05, "loss": 0.138, "step": 773, "task_loss": 0.059596575796604156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7250796959241018, "compression_loss": 0.0, "distillation_loss": 0.19294121861457825, "epoch": 0.74, "learning_rate": 4.866231415450062e-05, "loss": 0.1905, "step": 774, "task_loss": 0.16841061413288116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7251090687559978, "compression_loss": 0.0, "distillation_loss": 0.21601201593875885, "epoch": 0.74, "learning_rate": 4.8658873284690866e-05, "loss": 0.2109, "step": 775, "task_loss": 0.16467759013175964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7251384339097258, "compression_loss": 0.0, "distillation_loss": 0.2825254797935486, "epoch": 0.74, "learning_rate": 4.865542811713284e-05, "loss": 0.283, "step": 776, "task_loss": 0.28761669993400574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7251677913862895, "compression_loss": 0.0, "distillation_loss": 0.17407667636871338, "epoch": 0.74, "learning_rate": 4.865197865245237e-05, "loss": 0.1644, "step": 777, "task_loss": 0.07734289765357971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7251971411866924, "compression_loss": 0.0, "distillation_loss": 0.1487307846546173, "epoch": 0.74, "learning_rate": 4.8648524891276066e-05, "loss": 0.1402, "step": 778, "task_loss": 0.06377163529396057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7252264833119385, "compression_loss": 0.0, "distillation_loss": 0.6334168910980225, "epoch": 0.74, "learning_rate": 4.8645066834231325e-05, "loss": 0.616, "step": 779, "task_loss": 0.4591747522354126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7252558177630313, "compression_loss": 0.0, "distillation_loss": 0.3129650950431824, "epoch": 0.74, "learning_rate": 4.8641604481946314e-05, "loss": 0.2944, "step": 780, "task_loss": 0.12727110087871552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7252851445409744, "compression_loss": 0.0, "distillation_loss": 0.5313491821289062, "epoch": 0.74, "learning_rate": 4.863813783504999e-05, "loss": 0.5238, "step": 781, "task_loss": 0.4554893374443054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7253144636467717, "compression_loss": 0.0, "distillation_loss": 0.16231770813465118, "epoch": 0.74, "learning_rate": 4.863466689417209e-05, "loss": 0.1523, "step": 782, "task_loss": 0.062042634934186935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7253437750814267, "compression_loss": 0.0, "distillation_loss": 0.20974399149417877, "epoch": 0.74, "learning_rate": 4.863119165994312e-05, "loss": 0.2063, "step": 783, "task_loss": 0.1748623102903366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7253730788459432, "compression_loss": 0.0, "distillation_loss": 0.2711687684059143, "epoch": 0.74, "learning_rate": 4.862771213299438e-05, "loss": 0.2706, "step": 784, "task_loss": 0.26576724648475647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7254023749413246, "compression_loss": 0.0, "distillation_loss": 0.2579856514930725, "epoch": 0.75, "learning_rate": 4.8624228313957937e-05, "loss": 0.2482, "step": 785, "task_loss": 0.1598002314567566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.725431663368575, "compression_loss": 0.0, "distillation_loss": 0.2370847761631012, "epoch": 0.75, "learning_rate": 4.862074020346664e-05, "loss": 0.2268, "step": 786, "task_loss": 0.1346910297870636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7254609441286979, "compression_loss": 0.0, "distillation_loss": 0.4882653057575226, "epoch": 0.75, "learning_rate": 4.8617247802154134e-05, "loss": 0.4718, "step": 787, "task_loss": 0.323346883058548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7254902172226969, "compression_loss": 0.0, "distillation_loss": 0.5949300527572632, "epoch": 0.75, "learning_rate": 4.861375111065482e-05, "loss": 0.5811, "step": 788, "task_loss": 0.4562370777130127 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7255194826515758, "compression_loss": 0.0, "distillation_loss": 0.5340683460235596, "epoch": 0.75, "learning_rate": 4.861025012960389e-05, "loss": 0.5078, "step": 789, "task_loss": 0.27188482880592346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7255487404163383, "compression_loss": 0.0, "distillation_loss": 0.23920854926109314, "epoch": 0.75, "learning_rate": 4.8606744859637316e-05, "loss": 0.2273, "step": 790, "task_loss": 0.11962777376174927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.725577990517988, "compression_loss": 0.0, "distillation_loss": 0.25565600395202637, "epoch": 0.75, "learning_rate": 4.8603235301391844e-05, "loss": 0.2569, "step": 791, "task_loss": 0.26804494857788086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7256072329575285, "compression_loss": 0.0, "distillation_loss": 0.2217859923839569, "epoch": 0.75, "learning_rate": 4.859972145550501e-05, "loss": 0.2133, "step": 792, "task_loss": 0.13717922568321228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7256364677359637, "compression_loss": 0.0, "distillation_loss": 0.24465468525886536, "epoch": 0.75, "learning_rate": 4.859620332261512e-05, "loss": 0.2358, "step": 793, "task_loss": 0.15646812319755554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7256656948542971, "compression_loss": 0.0, "distillation_loss": 0.23842063546180725, "epoch": 0.75, "learning_rate": 4.8592680903361247e-05, "loss": 0.2273, "step": 794, "task_loss": 0.12698203325271606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7256949143135324, "compression_loss": 0.0, "distillation_loss": 0.26218464970588684, "epoch": 0.75, "learning_rate": 4.858915419838327e-05, "loss": 0.2558, "step": 795, "task_loss": 0.19839191436767578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7257241261146734, "compression_loss": 0.0, "distillation_loss": 0.5854094624519348, "epoch": 0.76, "learning_rate": 4.8585623208321825e-05, "loss": 0.5535, "step": 796, "task_loss": 0.2661326229572296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7257533302587237, "compression_loss": 0.0, "distillation_loss": 0.18930557370185852, "epoch": 0.76, "learning_rate": 4.858208793381833e-05, "loss": 0.1816, "step": 797, "task_loss": 0.11239316314458847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.725782526746687, "compression_loss": 0.0, "distillation_loss": 0.35272884368896484, "epoch": 0.76, "learning_rate": 4.8578548375514995e-05, "loss": 0.3343, "step": 798, "task_loss": 0.16825662553310394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7258117155795669, "compression_loss": 0.0, "distillation_loss": 0.6028088331222534, "epoch": 0.76, "learning_rate": 4.8575004534054794e-05, "loss": 0.5743, "step": 799, "task_loss": 0.31754568219184875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7258408967583673, "compression_loss": 0.0, "distillation_loss": 0.16018742322921753, "epoch": 0.76, "learning_rate": 4.8571456410081474e-05, "loss": 0.1666, "step": 800, "task_loss": 0.22434723377227783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7258700702840917, "compression_loss": 0.0, "distillation_loss": 0.346615195274353, "epoch": 0.76, "learning_rate": 4.856790400423958e-05, "loss": 0.3398, "step": 801, "task_loss": 0.2779731750488281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7258992361577438, "compression_loss": 0.0, "distillation_loss": 0.40023940801620483, "epoch": 0.76, "learning_rate": 4.856434731717442e-05, "loss": 0.388, "step": 802, "task_loss": 0.27803096175193787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7259283943803273, "compression_loss": 0.0, "distillation_loss": 0.213174507021904, "epoch": 0.76, "learning_rate": 4.8560786349532075e-05, "loss": 0.199, "step": 803, "task_loss": 0.07103019952774048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7259575449528458, "compression_loss": 0.0, "distillation_loss": 0.15435612201690674, "epoch": 0.76, "learning_rate": 4.855722110195943e-05, "loss": 0.145, "step": 804, "task_loss": 0.06065506860613823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7259866878763032, "compression_loss": 0.0, "distillation_loss": 0.20377478003501892, "epoch": 0.76, "learning_rate": 4.8553651575104114e-05, "loss": 0.1985, "step": 805, "task_loss": 0.15101395547389984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.726015823151703, "compression_loss": 0.0, "distillation_loss": 0.3351978063583374, "epoch": 0.77, "learning_rate": 4.8550077769614554e-05, "loss": 0.3193, "step": 806, "task_loss": 0.17666833102703094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7260449507800489, "compression_loss": 0.0, "distillation_loss": 0.2128250151872635, "epoch": 0.77, "learning_rate": 4.8546499686139944e-05, "loss": 0.1992, "step": 807, "task_loss": 0.07669594138860703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7260740707623446, "compression_loss": 0.0, "distillation_loss": 0.22992324829101562, "epoch": 0.77, "learning_rate": 4.854291732533027e-05, "loss": 0.2204, "step": 808, "task_loss": 0.13490459322929382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.726103183099594, "compression_loss": 0.0, "distillation_loss": 0.1448119878768921, "epoch": 0.77, "learning_rate": 4.853933068783628e-05, "loss": 0.1417, "step": 809, "task_loss": 0.11348484456539154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7261322877928004, "compression_loss": 0.0, "distillation_loss": 0.16505393385887146, "epoch": 0.77, "learning_rate": 4.853573977430951e-05, "loss": 0.1539, "step": 810, "task_loss": 0.053132861852645874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7261613848429677, "compression_loss": 0.0, "distillation_loss": 0.15083172917366028, "epoch": 0.77, "learning_rate": 4.8532144585402254e-05, "loss": 0.1491, "step": 811, "task_loss": 0.13310600817203522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7261904742510995, "compression_loss": 0.0, "distillation_loss": 0.29990440607070923, "epoch": 0.77, "learning_rate": 4.85285451217676e-05, "loss": 0.2841, "step": 812, "task_loss": 0.1421201378107071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7262195560181995, "compression_loss": 0.0, "distillation_loss": 0.20483794808387756, "epoch": 0.77, "learning_rate": 4.8524941384059415e-05, "loss": 0.1949, "step": 813, "task_loss": 0.10570985078811646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7262486301452715, "compression_loss": 0.0, "distillation_loss": 0.152552530169487, "epoch": 0.77, "learning_rate": 4.8521333372932326e-05, "loss": 0.1494, "step": 814, "task_loss": 0.12140922248363495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7262776966333191, "compression_loss": 0.0, "distillation_loss": 0.2609613537788391, "epoch": 0.77, "learning_rate": 4.851772108904175e-05, "loss": 0.2615, "step": 815, "task_loss": 0.2658497095108032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7263067554833459, "compression_loss": 0.0, "distillation_loss": 0.07736967504024506, "epoch": 0.77, "learning_rate": 4.851410453304388e-05, "loss": 0.0738, "step": 816, "task_loss": 0.041510533541440964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7263358066963557, "compression_loss": 0.0, "distillation_loss": 0.17229236662387848, "epoch": 0.78, "learning_rate": 4.851048370559567e-05, "loss": 0.1659, "step": 817, "task_loss": 0.10811686515808105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7263648502733522, "compression_loss": 0.0, "distillation_loss": 0.15134498476982117, "epoch": 0.78, "learning_rate": 4.850685860735487e-05, "loss": 0.1597, "step": 818, "task_loss": 0.23519474267959595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7263938862153388, "compression_loss": 0.0, "distillation_loss": 0.42961549758911133, "epoch": 0.78, "learning_rate": 4.850322923898e-05, "loss": 0.408, "step": 819, "task_loss": 0.21310321986675262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7264229145233196, "compression_loss": 0.0, "distillation_loss": 0.15049859881401062, "epoch": 0.78, "learning_rate": 4.8499595601130337e-05, "loss": 0.1421, "step": 820, "task_loss": 0.06645572930574417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.726451935198298, "compression_loss": 0.0, "distillation_loss": 0.1578487902879715, "epoch": 0.78, "learning_rate": 4.849595769446596e-05, "loss": 0.1482, "step": 821, "task_loss": 0.06112413853406906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7264809482412778, "compression_loss": 0.0, "distillation_loss": 0.26902398467063904, "epoch": 0.78, "learning_rate": 4.849231551964771e-05, "loss": 0.2534, "step": 822, "task_loss": 0.1128850132226944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7265099536532627, "compression_loss": 0.0, "distillation_loss": 0.2599482536315918, "epoch": 0.78, "learning_rate": 4.848866907733721e-05, "loss": 0.2607, "step": 823, "task_loss": 0.26737767457962036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7265389514352562, "compression_loss": 0.0, "distillation_loss": 0.4169207513332367, "epoch": 0.78, "learning_rate": 4.848501836819684e-05, "loss": 0.4028, "step": 824, "task_loss": 0.2758685052394867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7265679415882622, "compression_loss": 0.0, "distillation_loss": 0.09598681330680847, "epoch": 0.78, "learning_rate": 4.848136339288979e-05, "loss": 0.0934, "step": 825, "task_loss": 0.06983894854784012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7265969241132842, "compression_loss": 0.0, "distillation_loss": 0.7514071464538574, "epoch": 0.78, "learning_rate": 4.8477704152079984e-05, "loss": 0.7297, "step": 826, "task_loss": 0.5340147614479065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7266258990113261, "compression_loss": 0.0, "distillation_loss": 0.41649430990219116, "epoch": 0.79, "learning_rate": 4.8474040646432153e-05, "loss": 0.4065, "step": 827, "task_loss": 0.31665799021720886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7266548662833914, "compression_loss": 0.0, "distillation_loss": 0.2038126438856125, "epoch": 0.79, "learning_rate": 4.8470372876611784e-05, "loss": 0.1918, "step": 828, "task_loss": 0.08404532074928284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7266838259304838, "compression_loss": 0.0, "distillation_loss": 0.09588038921356201, "epoch": 0.79, "learning_rate": 4.846670084328515e-05, "loss": 0.109, "step": 829, "task_loss": 0.22751304507255554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7267127779536071, "compression_loss": 0.0, "distillation_loss": 0.44214335083961487, "epoch": 0.79, "learning_rate": 4.846302454711929e-05, "loss": 0.4217, "step": 830, "task_loss": 0.23792800307273865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7267417223537649, "compression_loss": 0.0, "distillation_loss": 0.23889368772506714, "epoch": 0.79, "learning_rate": 4.845934398878202e-05, "loss": 0.2285, "step": 831, "task_loss": 0.13461318612098694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7267706591319608, "compression_loss": 0.0, "distillation_loss": 0.23278114199638367, "epoch": 0.79, "learning_rate": 4.845565916894193e-05, "loss": 0.2227, "step": 832, "task_loss": 0.13188397884368896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7267995882891986, "compression_loss": 0.0, "distillation_loss": 0.3519028425216675, "epoch": 0.79, "learning_rate": 4.8451970088268396e-05, "loss": 0.344, "step": 833, "task_loss": 0.27317169308662415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.726828509826482, "compression_loss": 0.0, "distillation_loss": 0.19134777784347534, "epoch": 0.79, "learning_rate": 4.8448276747431545e-05, "loss": 0.1843, "step": 834, "task_loss": 0.12132743000984192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7268574237448145, "compression_loss": 0.0, "distillation_loss": 0.5469926595687866, "epoch": 0.79, "learning_rate": 4.84445791471023e-05, "loss": 0.5193, "step": 835, "task_loss": 0.27034249901771545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7268863300452001, "compression_loss": 0.0, "distillation_loss": 0.22652700543403625, "epoch": 0.79, "learning_rate": 4.8440877287952336e-05, "loss": 0.2205, "step": 836, "task_loss": 0.16659414768218994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7269152287286422, "compression_loss": 0.0, "distillation_loss": 0.1271471381187439, "epoch": 0.79, "learning_rate": 4.8437171170654125e-05, "loss": 0.1293, "step": 837, "task_loss": 0.1484624445438385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7269441197961446, "compression_loss": 0.0, "distillation_loss": 0.2708894908428192, "epoch": 0.8, "learning_rate": 4.843346079588089e-05, "loss": 0.257, "step": 838, "task_loss": 0.132267564535141 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7269730032487108, "compression_loss": 0.0, "distillation_loss": 0.3023916482925415, "epoch": 0.8, "learning_rate": 4.842974616430665e-05, "loss": 0.2919, "step": 839, "task_loss": 0.197329580783844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7270018790873449, "compression_loss": 0.0, "distillation_loss": 0.30287623405456543, "epoch": 0.8, "learning_rate": 4.842602727660618e-05, "loss": 0.3182, "step": 840, "task_loss": 0.4560437798500061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7270307473130502, "compression_loss": 0.0, "distillation_loss": 0.23289792239665985, "epoch": 0.8, "learning_rate": 4.842230413345503e-05, "loss": 0.2203, "step": 841, "task_loss": 0.10670986026525497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7270596079268306, "compression_loss": 0.0, "distillation_loss": 0.1355813890695572, "epoch": 0.8, "learning_rate": 4.8418576735529535e-05, "loss": 0.1275, "step": 842, "task_loss": 0.054461102932691574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7270884609296895, "compression_loss": 0.0, "distillation_loss": 0.20015108585357666, "epoch": 0.8, "learning_rate": 4.841484508350679e-05, "loss": 0.194, "step": 843, "task_loss": 0.13830646872520447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7271173063226308, "compression_loss": 0.0, "distillation_loss": 0.2658821940422058, "epoch": 0.8, "learning_rate": 4.841110917806467e-05, "loss": 0.2551, "step": 844, "task_loss": 0.15778332948684692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7271461441066583, "compression_loss": 0.0, "distillation_loss": 0.35619670152664185, "epoch": 0.8, "learning_rate": 4.840736901988182e-05, "loss": 0.3489, "step": 845, "task_loss": 0.2832150459289551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7271749742827754, "compression_loss": 0.0, "distillation_loss": 0.35258805751800537, "epoch": 0.8, "learning_rate": 4.840362460963765e-05, "loss": 0.3371, "step": 846, "task_loss": 0.19759327173233032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7272037968519861, "compression_loss": 0.0, "distillation_loss": 0.27292773127555847, "epoch": 0.8, "learning_rate": 4.8399875948012355e-05, "loss": 0.2561, "step": 847, "task_loss": 0.10433943569660187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7272326118152938, "compression_loss": 0.0, "distillation_loss": 0.44316327571868896, "epoch": 0.81, "learning_rate": 4.8396123035686906e-05, "loss": 0.4312, "step": 848, "task_loss": 0.323985755443573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7272614191737022, "compression_loss": 0.0, "distillation_loss": 0.3451952636241913, "epoch": 0.81, "learning_rate": 4.839236587334303e-05, "loss": 0.3296, "step": 849, "task_loss": 0.1894971877336502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7272902189282151, "compression_loss": 0.0, "distillation_loss": 0.3476291000843048, "epoch": 0.81, "learning_rate": 4.8388604461663236e-05, "loss": 0.3311, "step": 850, "task_loss": 0.18212732672691345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7273190110798362, "compression_loss": 0.0, "distillation_loss": 0.19594484567642212, "epoch": 0.81, "learning_rate": 4.838483880133079e-05, "loss": 0.1966, "step": 851, "task_loss": 0.20298801362514496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7273477956295691, "compression_loss": 0.0, "distillation_loss": 0.2919715940952301, "epoch": 0.81, "learning_rate": 4.8381068893029766e-05, "loss": 0.2813, "step": 852, "task_loss": 0.18507151305675507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7273765725784175, "compression_loss": 0.0, "distillation_loss": 0.2512668967247009, "epoch": 0.81, "learning_rate": 4.837729473744497e-05, "loss": 0.2501, "step": 853, "task_loss": 0.2395791858434677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7274053419273852, "compression_loss": 0.0, "distillation_loss": 0.3293880224227905, "epoch": 0.81, "learning_rate": 4.8373516335261994e-05, "loss": 0.3175, "step": 854, "task_loss": 0.21075962483882904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7274341036774756, "compression_loss": 0.0, "distillation_loss": 0.30303874611854553, "epoch": 0.81, "learning_rate": 4.8369733687167204e-05, "loss": 0.2945, "step": 855, "task_loss": 0.2173597514629364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7274628578296926, "compression_loss": 0.0, "distillation_loss": 0.1690731942653656, "epoch": 0.81, "learning_rate": 4.836594679384775e-05, "loss": 0.1559, "step": 856, "task_loss": 0.03761624917387962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7274916043850399, "compression_loss": 0.0, "distillation_loss": 0.3940788507461548, "epoch": 0.81, "learning_rate": 4.836215565599152e-05, "loss": 0.383, "step": 857, "task_loss": 0.2832016944885254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7275203433445211, "compression_loss": 0.0, "distillation_loss": 0.49104076623916626, "epoch": 0.81, "learning_rate": 4.835836027428722e-05, "loss": 0.4723, "step": 858, "task_loss": 0.3033701777458191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7275490747091399, "compression_loss": 0.0, "distillation_loss": 0.34213539958000183, "epoch": 0.82, "learning_rate": 4.8354560649424264e-05, "loss": 0.3263, "step": 859, "task_loss": 0.18411041796207428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7275777984798999, "compression_loss": 0.0, "distillation_loss": 0.10528973489999771, "epoch": 0.82, "learning_rate": 4.8350756782092894e-05, "loss": 0.1246, "step": 860, "task_loss": 0.2985772490501404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7276065146578049, "compression_loss": 0.0, "distillation_loss": 0.28886890411376953, "epoch": 0.82, "learning_rate": 4.8346948672984096e-05, "loss": 0.2806, "step": 861, "task_loss": 0.20596402883529663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7276352232438587, "compression_loss": 0.0, "distillation_loss": 0.24927183985710144, "epoch": 0.82, "learning_rate": 4.8343136322789626e-05, "loss": 0.2412, "step": 862, "task_loss": 0.16882850229740143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7276639242390647, "compression_loss": 0.0, "distillation_loss": 0.48319730162620544, "epoch": 0.82, "learning_rate": 4.8339319732202024e-05, "loss": 0.4605, "step": 863, "task_loss": 0.2564105689525604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7276926176444267, "compression_loss": 0.0, "distillation_loss": 0.272256463766098, "epoch": 0.82, "learning_rate": 4.83354989019146e-05, "loss": 0.261, "step": 864, "task_loss": 0.16006284952163696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7277213034609484, "compression_loss": 0.0, "distillation_loss": 0.43346107006073, "epoch": 0.82, "learning_rate": 4.83316738326214e-05, "loss": 0.4201, "step": 865, "task_loss": 0.30030304193496704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7277499816896336, "compression_loss": 0.0, "distillation_loss": 0.2226218283176422, "epoch": 0.82, "learning_rate": 4.832784452501729e-05, "loss": 0.2106, "step": 866, "task_loss": 0.1027415320277214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7277786523314856, "compression_loss": 0.0, "distillation_loss": 0.4321751892566681, "epoch": 0.82, "learning_rate": 4.8324010979797875e-05, "loss": 0.4115, "step": 867, "task_loss": 0.22510364651679993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7278073153875085, "compression_loss": 0.0, "distillation_loss": 0.4015240967273712, "epoch": 0.82, "learning_rate": 4.8320173197659534e-05, "loss": 0.3924, "step": 868, "task_loss": 0.310598224401474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7278359708587059, "compression_loss": 0.0, "distillation_loss": 0.1309269368648529, "epoch": 0.83, "learning_rate": 4.831633117929942e-05, "loss": 0.1323, "step": 869, "task_loss": 0.1442694216966629 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7278646187460813, "compression_loss": 0.0, "distillation_loss": 0.34364479780197144, "epoch": 0.83, "learning_rate": 4.831248492541545e-05, "loss": 0.3322, "step": 870, "task_loss": 0.2290574610233307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7278932590506385, "compression_loss": 0.0, "distillation_loss": 0.4297097623348236, "epoch": 0.83, "learning_rate": 4.830863443670632e-05, "loss": 0.4357, "step": 871, "task_loss": 0.48918217420578003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7279218917733812, "compression_loss": 0.0, "distillation_loss": 0.3155050277709961, "epoch": 0.83, "learning_rate": 4.8304779713871495e-05, "loss": 0.3033, "step": 872, "task_loss": 0.19333837926387787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7279505169153131, "compression_loss": 0.0, "distillation_loss": 0.19998089969158173, "epoch": 0.83, "learning_rate": 4.83009207576112e-05, "loss": 0.1878, "step": 873, "task_loss": 0.07784054428339005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7279791344774377, "compression_loss": 0.0, "distillation_loss": 0.35912150144577026, "epoch": 0.83, "learning_rate": 4.829705756862642e-05, "loss": 0.3322, "step": 874, "task_loss": 0.09007196873426437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7280077444607589, "compression_loss": 0.0, "distillation_loss": 0.42145901918411255, "epoch": 0.83, "learning_rate": 4.829319014761894e-05, "loss": 0.4086, "step": 875, "task_loss": 0.29324570298194885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7280363468662803, "compression_loss": 0.0, "distillation_loss": 0.5021111369132996, "epoch": 0.83, "learning_rate": 4.828931849529129e-05, "loss": 0.4807, "step": 876, "task_loss": 0.28812965750694275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7280649416950056, "compression_loss": 0.0, "distillation_loss": 0.33752530813217163, "epoch": 0.83, "learning_rate": 4.8285442612346774e-05, "loss": 0.318, "step": 877, "task_loss": 0.14252959191799164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7280935289479383, "compression_loss": 0.0, "distillation_loss": 0.24429580569267273, "epoch": 0.83, "learning_rate": 4.828156249948946e-05, "loss": 0.2482, "step": 878, "task_loss": 0.2833808660507202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7281221086260824, "compression_loss": 0.0, "distillation_loss": 0.16154003143310547, "epoch": 0.83, "learning_rate": 4.827767815742419e-05, "loss": 0.1566, "step": 879, "task_loss": 0.11249750107526779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7281506807304414, "compression_loss": 0.0, "distillation_loss": 0.2341882586479187, "epoch": 0.84, "learning_rate": 4.8273789586856574e-05, "loss": 0.2249, "step": 880, "task_loss": 0.14081409573554993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.728179245262019, "compression_loss": 0.0, "distillation_loss": 0.22025887668132782, "epoch": 0.84, "learning_rate": 4.8269896788493e-05, "loss": 0.2063, "step": 881, "task_loss": 0.0802309438586235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7282078022218189, "compression_loss": 0.0, "distillation_loss": 0.2393018901348114, "epoch": 0.84, "learning_rate": 4.8265999763040603e-05, "loss": 0.2291, "step": 882, "task_loss": 0.13759064674377441 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7282363516108448, "compression_loss": 0.0, "distillation_loss": 0.22338096797466278, "epoch": 0.84, "learning_rate": 4.8262098511207295e-05, "loss": 0.2188, "step": 883, "task_loss": 0.17738208174705505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7282648934301004, "compression_loss": 0.0, "distillation_loss": 0.250945508480072, "epoch": 0.84, "learning_rate": 4.825819303370177e-05, "loss": 0.2357, "step": 884, "task_loss": 0.0986076220870018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7282934276805891, "compression_loss": 0.0, "distillation_loss": 0.32160013914108276, "epoch": 0.84, "learning_rate": 4.8254283331233464e-05, "loss": 0.3065, "step": 885, "task_loss": 0.1702209860086441 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.728321954363315, "compression_loss": 0.0, "distillation_loss": 0.13798242807388306, "epoch": 0.84, "learning_rate": 4.825036940451259e-05, "loss": 0.1329, "step": 886, "task_loss": 0.08686836808919907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7283504734792816, "compression_loss": 0.0, "distillation_loss": 0.28318336606025696, "epoch": 0.84, "learning_rate": 4.8246451254250145e-05, "loss": 0.2692, "step": 887, "task_loss": 0.14343897998332977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7283789850294926, "compression_loss": 0.0, "distillation_loss": 0.31278300285339355, "epoch": 0.84, "learning_rate": 4.8242528881157866e-05, "loss": 0.306, "step": 888, "task_loss": 0.24526852369308472 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7284074890149516, "compression_loss": 0.0, "distillation_loss": 0.5371044874191284, "epoch": 0.84, "learning_rate": 4.823860228594829e-05, "loss": 0.5197, "step": 889, "task_loss": 0.36329126358032227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7284359854366623, "compression_loss": 0.0, "distillation_loss": 0.19384345412254333, "epoch": 0.85, "learning_rate": 4.823467146933468e-05, "loss": 0.1807, "step": 890, "task_loss": 0.06265727430582047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7284644742956286, "compression_loss": 0.0, "distillation_loss": 0.37346696853637695, "epoch": 0.85, "learning_rate": 4.823073643203111e-05, "loss": 0.3685, "step": 891, "task_loss": 0.3242899179458618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7284929555928539, "compression_loss": 0.0, "distillation_loss": 0.4346903860569, "epoch": 0.85, "learning_rate": 4.822679717475237e-05, "loss": 0.4304, "step": 892, "task_loss": 0.39173489809036255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7285214293293419, "compression_loss": 0.0, "distillation_loss": 0.36973273754119873, "epoch": 0.85, "learning_rate": 4.8222853698214076e-05, "loss": 0.3513, "step": 893, "task_loss": 0.18498845398426056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7285498955060965, "compression_loss": 0.0, "distillation_loss": 0.3774262070655823, "epoch": 0.85, "learning_rate": 4.8218906003132555e-05, "loss": 0.3675, "step": 894, "task_loss": 0.278170645236969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7285783541241213, "compression_loss": 0.0, "distillation_loss": 0.43660831451416016, "epoch": 0.85, "learning_rate": 4.8214954090224946e-05, "loss": 0.4236, "step": 895, "task_loss": 0.3061750829219818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7286068051844199, "compression_loss": 0.0, "distillation_loss": 0.2611873149871826, "epoch": 0.85, "learning_rate": 4.8210997960209114e-05, "loss": 0.2515, "step": 896, "task_loss": 0.16423273086547852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7286352486879959, "compression_loss": 0.0, "distillation_loss": 0.2831340730190277, "epoch": 0.85, "learning_rate": 4.8207037613803715e-05, "loss": 0.2735, "step": 897, "task_loss": 0.18677663803100586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7286636846358533, "compression_loss": 0.0, "distillation_loss": 0.38769102096557617, "epoch": 0.85, "learning_rate": 4.820307305172818e-05, "loss": 0.3674, "step": 898, "task_loss": 0.1845768690109253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7286921130289954, "compression_loss": 0.0, "distillation_loss": 0.28008437156677246, "epoch": 0.85, "learning_rate": 4.8199104274702666e-05, "loss": 0.265, "step": 899, "task_loss": 0.1291656196117401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7287205338684262, "compression_loss": 0.0, "distillation_loss": 0.12043634802103043, "epoch": 0.85, "learning_rate": 4.819513128344814e-05, "loss": 0.1209, "step": 900, "task_loss": 0.12554529309272766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7287489471551492, "compression_loss": 0.0, "distillation_loss": 0.2257360816001892, "epoch": 0.86, "learning_rate": 4.8191154078686306e-05, "loss": 0.2282, "step": 901, "task_loss": 0.25028055906295776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7287773528901681, "compression_loss": 0.0, "distillation_loss": 0.2224644273519516, "epoch": 0.86, "learning_rate": 4.8187172661139636e-05, "loss": 0.2194, "step": 902, "task_loss": 0.19155338406562805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7288057510744866, "compression_loss": 0.0, "distillation_loss": 0.1225384920835495, "epoch": 0.86, "learning_rate": 4.818318703153139e-05, "loss": 0.1325, "step": 903, "task_loss": 0.2224058359861374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7288341417091085, "compression_loss": 0.0, "distillation_loss": 0.5352866649627686, "epoch": 0.86, "learning_rate": 4.817919719058557e-05, "loss": 0.528, "step": 904, "task_loss": 0.4623543620109558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7288625247950373, "compression_loss": 0.0, "distillation_loss": 0.2698153257369995, "epoch": 0.86, "learning_rate": 4.8175203139026934e-05, "loss": 0.2603, "step": 905, "task_loss": 0.17438843846321106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7288909003332769, "compression_loss": 0.0, "distillation_loss": 0.7470159530639648, "epoch": 0.86, "learning_rate": 4.817120487758104e-05, "loss": 0.712, "step": 906, "task_loss": 0.39666780829429626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7289192683248307, "compression_loss": 0.0, "distillation_loss": 0.22724510729312897, "epoch": 0.86, "learning_rate": 4.81672024069742e-05, "loss": 0.2208, "step": 907, "task_loss": 0.16269180178642273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7289476287707025, "compression_loss": 0.0, "distillation_loss": 0.3366519808769226, "epoch": 0.86, "learning_rate": 4.816319572793345e-05, "loss": 0.3336, "step": 908, "task_loss": 0.3060190975666046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.728975981671896, "compression_loss": 0.0, "distillation_loss": 0.1925545036792755, "epoch": 0.86, "learning_rate": 4.815918484118665e-05, "loss": 0.18, "step": 909, "task_loss": 0.0666121393442154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.729004327029415, "compression_loss": 0.0, "distillation_loss": 0.1643257886171341, "epoch": 0.86, "learning_rate": 4.815516974746239e-05, "loss": 0.151, "step": 910, "task_loss": 0.030893657356500626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.729032664844263, "compression_loss": 0.0, "distillation_loss": 0.20738086104393005, "epoch": 0.87, "learning_rate": 4.815115044749003e-05, "loss": 0.1996, "step": 911, "task_loss": 0.12986215949058533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7290609951174438, "compression_loss": 0.0, "distillation_loss": 0.2494029700756073, "epoch": 0.87, "learning_rate": 4.814712694199969e-05, "loss": 0.2375, "step": 912, "task_loss": 0.13066712021827698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.729089317849961, "compression_loss": 0.0, "distillation_loss": 0.24174784123897552, "epoch": 0.87, "learning_rate": 4.814309923172227e-05, "loss": 0.2271, "step": 913, "task_loss": 0.09568721055984497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7291176330428183, "compression_loss": 0.0, "distillation_loss": 0.4642338752746582, "epoch": 0.87, "learning_rate": 4.81390673173894e-05, "loss": 0.4392, "step": 914, "task_loss": 0.2140159010887146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7291459406970194, "compression_loss": 0.0, "distillation_loss": 0.28287458419799805, "epoch": 0.87, "learning_rate": 4.8135031199733524e-05, "loss": 0.2737, "step": 915, "task_loss": 0.19124047458171844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7291742408135681, "compression_loss": 0.0, "distillation_loss": 0.29212427139282227, "epoch": 0.87, "learning_rate": 4.813099087948781e-05, "loss": 0.281, "step": 916, "task_loss": 0.18083734810352325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7292025333934679, "compression_loss": 0.0, "distillation_loss": 0.2220340222120285, "epoch": 0.87, "learning_rate": 4.812694635738621e-05, "loss": 0.2106, "step": 917, "task_loss": 0.10801569372415543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7292308184377224, "compression_loss": 0.0, "distillation_loss": 0.1814860701560974, "epoch": 0.87, "learning_rate": 4.812289763416341e-05, "loss": 0.1735, "step": 918, "task_loss": 0.10150802880525589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7292590959473356, "compression_loss": 0.0, "distillation_loss": 0.1173454150557518, "epoch": 0.87, "learning_rate": 4.81188447105549e-05, "loss": 0.11, "step": 919, "task_loss": 0.0437774695456028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7292873659233109, "compression_loss": 0.0, "distillation_loss": 0.28012198209762573, "epoch": 0.87, "learning_rate": 4.811478758729691e-05, "loss": 0.2636, "step": 920, "task_loss": 0.11441508680582047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7293156283666522, "compression_loss": 0.0, "distillation_loss": 0.29111599922180176, "epoch": 0.87, "learning_rate": 4.811072626512642e-05, "loss": 0.2741, "step": 921, "task_loss": 0.12139546126127243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.729343883278363, "compression_loss": 0.0, "distillation_loss": 0.36805009841918945, "epoch": 0.88, "learning_rate": 4.810666074478121e-05, "loss": 0.3586, "step": 922, "task_loss": 0.2733895480632782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7293721306594471, "compression_loss": 0.0, "distillation_loss": 0.2743581533432007, "epoch": 0.88, "learning_rate": 4.8102591026999796e-05, "loss": 0.2591, "step": 923, "task_loss": 0.12169023603200912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7294003705109081, "compression_loss": 0.0, "distillation_loss": 0.12222736328840256, "epoch": 0.88, "learning_rate": 4.8098517112521456e-05, "loss": 0.1283, "step": 924, "task_loss": 0.18253932893276215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7294286028337497, "compression_loss": 0.0, "distillation_loss": 0.38871803879737854, "epoch": 0.88, "learning_rate": 4.8094439002086234e-05, "loss": 0.3813, "step": 925, "task_loss": 0.31461140513420105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7294568276289756, "compression_loss": 0.0, "distillation_loss": 0.07011576741933823, "epoch": 0.88, "learning_rate": 4.809035669643495e-05, "loss": 0.0841, "step": 926, "task_loss": 0.21029669046401978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7294850448975896, "compression_loss": 0.0, "distillation_loss": 0.2029143124818802, "epoch": 0.88, "learning_rate": 4.808627019630917e-05, "loss": 0.1999, "step": 927, "task_loss": 0.17250923812389374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7295132546405951, "compression_loss": 0.0, "distillation_loss": 0.4461289942264557, "epoch": 0.88, "learning_rate": 4.808217950245122e-05, "loss": 0.4298, "step": 928, "task_loss": 0.28285056352615356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7295414568589961, "compression_loss": 0.0, "distillation_loss": 0.2739975154399872, "epoch": 0.88, "learning_rate": 4.807808461560419e-05, "loss": 0.2818, "step": 929, "task_loss": 0.35219091176986694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.729569651553796, "compression_loss": 0.0, "distillation_loss": 0.14522334933280945, "epoch": 0.88, "learning_rate": 4.8073985536511956e-05, "loss": 0.1395, "step": 930, "task_loss": 0.08773127943277359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7295978387259987, "compression_loss": 0.0, "distillation_loss": 0.1583382934331894, "epoch": 0.88, "learning_rate": 4.806988226591912e-05, "loss": 0.1488, "step": 931, "task_loss": 0.06294950842857361 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7296260183766078, "compression_loss": 0.0, "distillation_loss": 0.42467373609542847, "epoch": 0.89, "learning_rate": 4.806577480457106e-05, "loss": 0.4083, "step": 932, "task_loss": 0.26088839769363403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7296541905066269, "compression_loss": 0.0, "distillation_loss": 0.2598496377468109, "epoch": 0.89, "learning_rate": 4.8061663153213935e-05, "loss": 0.2517, "step": 933, "task_loss": 0.17828907072544098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7296823551170598, "compression_loss": 0.0, "distillation_loss": 0.26050207018852234, "epoch": 0.89, "learning_rate": 4.805754731259462e-05, "loss": 0.2466, "step": 934, "task_loss": 0.12179625779390335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7297105122089101, "compression_loss": 0.0, "distillation_loss": 0.2224908173084259, "epoch": 0.89, "learning_rate": 4.805342728346079e-05, "loss": 0.212, "step": 935, "task_loss": 0.11794877797365189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7297386617831816, "compression_loss": 0.0, "distillation_loss": 0.2159128487110138, "epoch": 0.89, "learning_rate": 4.804930306656087e-05, "loss": 0.2034, "step": 936, "task_loss": 0.090658038854599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7297668038408779, "compression_loss": 0.0, "distillation_loss": 0.42296963930130005, "epoch": 0.89, "learning_rate": 4.804517466264405e-05, "loss": 0.4042, "step": 937, "task_loss": 0.2355644851922989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7297949383830027, "compression_loss": 0.0, "distillation_loss": 0.1279992312192917, "epoch": 0.89, "learning_rate": 4.8041042072460244e-05, "loss": 0.1309, "step": 938, "task_loss": 0.15678860247135162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7298230654105596, "compression_loss": 0.0, "distillation_loss": 0.4838896095752716, "epoch": 0.89, "learning_rate": 4.803690529676019e-05, "loss": 0.4677, "step": 939, "task_loss": 0.3221690356731415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7298511849245524, "compression_loss": 0.0, "distillation_loss": 0.3270692229270935, "epoch": 0.89, "learning_rate": 4.803276433629534e-05, "loss": 0.3116, "step": 940, "task_loss": 0.17188920080661774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7298792969259846, "compression_loss": 0.0, "distillation_loss": 0.1887792944908142, "epoch": 0.89, "learning_rate": 4.802861919181793e-05, "loss": 0.1795, "step": 941, "task_loss": 0.09571507573127747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7299074014158602, "compression_loss": 0.0, "distillation_loss": 0.33935946226119995, "epoch": 0.89, "learning_rate": 4.802446986408093e-05, "loss": 0.3392, "step": 942, "task_loss": 0.33791565895080566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7299354983951827, "compression_loss": 0.0, "distillation_loss": 0.33818504214286804, "epoch": 0.9, "learning_rate": 4.8020316353838095e-05, "loss": 0.3372, "step": 943, "task_loss": 0.3283797800540924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7299635878649556, "compression_loss": 0.0, "distillation_loss": 0.10457701981067657, "epoch": 0.9, "learning_rate": 4.8016158661843926e-05, "loss": 0.1072, "step": 944, "task_loss": 0.13106907904148102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7299916698261829, "compression_loss": 0.0, "distillation_loss": 0.1351221650838852, "epoch": 0.9, "learning_rate": 4.8011996788853686e-05, "loss": 0.1263, "step": 945, "task_loss": 0.04674810171127319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7300197442798682, "compression_loss": 0.0, "distillation_loss": 0.3631080389022827, "epoch": 0.9, "learning_rate": 4.80078307356234e-05, "loss": 0.3498, "step": 946, "task_loss": 0.2304990440607071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.730047811227015, "compression_loss": 0.0, "distillation_loss": 0.42994534969329834, "epoch": 0.9, "learning_rate": 4.800366050290986e-05, "loss": 0.4158, "step": 947, "task_loss": 0.28830981254577637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7300758706686271, "compression_loss": 0.0, "distillation_loss": 0.3227723240852356, "epoch": 0.9, "learning_rate": 4.799948609147061e-05, "loss": 0.3099, "step": 948, "task_loss": 0.19404058158397675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7301039226057082, "compression_loss": 0.0, "distillation_loss": 0.20503823459148407, "epoch": 0.9, "learning_rate": 4.7995307502063936e-05, "loss": 0.1954, "step": 949, "task_loss": 0.10874759405851364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7301319670392621, "compression_loss": 0.0, "distillation_loss": 0.3428541421890259, "epoch": 0.9, "learning_rate": 4.799112473544891e-05, "loss": 0.3248, "step": 950, "task_loss": 0.16210930049419403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7301600039702922, "compression_loss": 0.0, "distillation_loss": 0.31048500537872314, "epoch": 0.9, "learning_rate": 4.7986937792385344e-05, "loss": 0.2987, "step": 951, "task_loss": 0.19261281192302704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7301880333998025, "compression_loss": 0.0, "distillation_loss": 0.33201491832733154, "epoch": 0.9, "learning_rate": 4.798274667363383e-05, "loss": 0.3181, "step": 952, "task_loss": 0.19296838343143463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7302160553287964, "compression_loss": 0.0, "distillation_loss": 0.4270634949207306, "epoch": 0.91, "learning_rate": 4.7978551379955684e-05, "loss": 0.4057, "step": 953, "task_loss": 0.21361877024173737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7302440697582777, "compression_loss": 0.0, "distillation_loss": 0.12916311621665955, "epoch": 0.91, "learning_rate": 4.797435191211302e-05, "loss": 0.1248, "step": 954, "task_loss": 0.08600229024887085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7302720766892501, "compression_loss": 0.0, "distillation_loss": 0.17904284596443176, "epoch": 0.91, "learning_rate": 4.797014827086869e-05, "loss": 0.1746, "step": 955, "task_loss": 0.13418468832969666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7303000761227173, "compression_loss": 0.0, "distillation_loss": 0.12082010507583618, "epoch": 0.91, "learning_rate": 4.79659404569863e-05, "loss": 0.1303, "step": 956, "task_loss": 0.21565213799476624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7303280680596829, "compression_loss": 0.0, "distillation_loss": 0.3740869164466858, "epoch": 0.91, "learning_rate": 4.7961728471230214e-05, "loss": 0.3668, "step": 957, "task_loss": 0.30169612169265747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7303560525011507, "compression_loss": 0.0, "distillation_loss": 0.2538146674633026, "epoch": 0.91, "learning_rate": 4.7957512314365574e-05, "loss": 0.2491, "step": 958, "task_loss": 0.20712712407112122 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7303840294481242, "compression_loss": 0.0, "distillation_loss": 0.3629278540611267, "epoch": 0.91, "learning_rate": 4.7953291987158254e-05, "loss": 0.3478, "step": 959, "task_loss": 0.21124333143234253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7304119989016074, "compression_loss": 0.0, "distillation_loss": 0.2513018846511841, "epoch": 0.91, "learning_rate": 4.79490674903749e-05, "loss": 0.2443, "step": 960, "task_loss": 0.18155395984649658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7304399608626035, "compression_loss": 0.0, "distillation_loss": 0.14075066149234772, "epoch": 0.91, "learning_rate": 4.7944838824782916e-05, "loss": 0.1426, "step": 961, "task_loss": 0.15932926535606384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7304679153321166, "compression_loss": 0.0, "distillation_loss": 0.25464701652526855, "epoch": 0.91, "learning_rate": 4.794060599115045e-05, "loss": 0.2359, "step": 962, "task_loss": 0.06738609075546265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7304958623111502, "compression_loss": 0.0, "distillation_loss": 0.12189721316099167, "epoch": 0.91, "learning_rate": 4.793636899024643e-05, "loss": 0.1161, "step": 963, "task_loss": 0.06351499259471893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7305238018007081, "compression_loss": 0.0, "distillation_loss": 0.13325002789497375, "epoch": 0.92, "learning_rate": 4.7932127822840516e-05, "loss": 0.1313, "step": 964, "task_loss": 0.11407996714115143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7305517338017938, "compression_loss": 0.0, "distillation_loss": 0.13893647491931915, "epoch": 0.92, "learning_rate": 4.792788248970314e-05, "loss": 0.1442, "step": 965, "task_loss": 0.1910799741744995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7305796583154112, "compression_loss": 0.0, "distillation_loss": 0.49337151646614075, "epoch": 0.92, "learning_rate": 4.79236329916055e-05, "loss": 0.4772, "step": 966, "task_loss": 0.3313041031360626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7306075753425638, "compression_loss": 0.0, "distillation_loss": 0.1377095878124237, "epoch": 0.92, "learning_rate": 4.79193793293195e-05, "loss": 0.1368, "step": 967, "task_loss": 0.12868079543113708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7306354848842553, "compression_loss": 0.0, "distillation_loss": 0.5649617910385132, "epoch": 0.92, "learning_rate": 4.791512150361788e-05, "loss": 0.5411, "step": 968, "task_loss": 0.3263978660106659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7306633869414896, "compression_loss": 0.0, "distillation_loss": 0.19388450682163239, "epoch": 0.92, "learning_rate": 4.791085951527408e-05, "loss": 0.1838, "step": 969, "task_loss": 0.09344847500324249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.73069128151527, "compression_loss": 0.0, "distillation_loss": 0.11812691390514374, "epoch": 0.92, "learning_rate": 4.7906593365062304e-05, "loss": 0.1124, "step": 970, "task_loss": 0.06074811518192291 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7307191686066006, "compression_loss": 0.0, "distillation_loss": 0.2905130386352539, "epoch": 0.92, "learning_rate": 4.790232305375752e-05, "loss": 0.2802, "step": 971, "task_loss": 0.18719345331192017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7307470482164847, "compression_loss": 0.0, "distillation_loss": 0.41280168294906616, "epoch": 0.92, "learning_rate": 4.789804858213547e-05, "loss": 0.3943, "step": 972, "task_loss": 0.2281726449728012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7307749203459263, "compression_loss": 0.0, "distillation_loss": 0.5764849185943604, "epoch": 0.92, "learning_rate": 4.7893769950972605e-05, "loss": 0.5575, "step": 973, "task_loss": 0.3868526816368103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7308027849959289, "compression_loss": 0.0, "distillation_loss": 0.3850351572036743, "epoch": 0.92, "learning_rate": 4.788948716104618e-05, "loss": 0.3583, "step": 974, "task_loss": 0.1175268292427063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7308306421674962, "compression_loss": 0.0, "distillation_loss": 0.22594526410102844, "epoch": 0.93, "learning_rate": 4.7885200213134164e-05, "loss": 0.2176, "step": 975, "task_loss": 0.1424587517976761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7308584918616319, "compression_loss": 0.0, "distillation_loss": 0.2664077579975128, "epoch": 0.93, "learning_rate": 4.788090910801532e-05, "loss": 0.2586, "step": 976, "task_loss": 0.18784651160240173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7308863340793397, "compression_loss": 0.0, "distillation_loss": 0.24012893438339233, "epoch": 0.93, "learning_rate": 4.787661384646913e-05, "loss": 0.2305, "step": 977, "task_loss": 0.14341062307357788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7309141688216232, "compression_loss": 0.0, "distillation_loss": 0.19192594289779663, "epoch": 0.93, "learning_rate": 4.787231442927587e-05, "loss": 0.1862, "step": 978, "task_loss": 0.13485443592071533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7309419960894863, "compression_loss": 0.0, "distillation_loss": 0.2409208118915558, "epoch": 0.93, "learning_rate": 4.786801085721654e-05, "loss": 0.2312, "step": 979, "task_loss": 0.14332106709480286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7309698158839324, "compression_loss": 0.0, "distillation_loss": 0.5161716938018799, "epoch": 0.93, "learning_rate": 4.78637031310729e-05, "loss": 0.4927, "step": 980, "task_loss": 0.28135231137275696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7309976282059654, "compression_loss": 0.0, "distillation_loss": 0.3326793909072876, "epoch": 0.93, "learning_rate": 4.7859391251627474e-05, "loss": 0.3267, "step": 981, "task_loss": 0.2727680802345276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7310254330565888, "compression_loss": 0.0, "distillation_loss": 0.1554919183254242, "epoch": 0.93, "learning_rate": 4.7855075219663535e-05, "loss": 0.148, "step": 982, "task_loss": 0.08066565543413162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7310532304368065, "compression_loss": 0.0, "distillation_loss": 0.34702181816101074, "epoch": 0.93, "learning_rate": 4.785075503596511e-05, "loss": 0.3353, "step": 983, "task_loss": 0.2300397753715515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.731081020347622, "compression_loss": 0.0, "distillation_loss": 0.07944006472826004, "epoch": 0.93, "learning_rate": 4.7846430701316994e-05, "loss": 0.0734, "step": 984, "task_loss": 0.018551897257566452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7311088027900391, "compression_loss": 0.0, "distillation_loss": 0.35553303360939026, "epoch": 0.94, "learning_rate": 4.78421022165047e-05, "loss": 0.3459, "step": 985, "task_loss": 0.25915029644966125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7311365777650615, "compression_loss": 0.0, "distillation_loss": 0.2604832351207733, "epoch": 0.94, "learning_rate": 4.783776958231453e-05, "loss": 0.2522, "step": 986, "task_loss": 0.1778934746980667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7311643452736927, "compression_loss": 0.0, "distillation_loss": 0.4339986741542816, "epoch": 0.94, "learning_rate": 4.783343279953353e-05, "loss": 0.4111, "step": 987, "task_loss": 0.205443874001503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7311921053169365, "compression_loss": 0.0, "distillation_loss": 0.09336867183446884, "epoch": 0.94, "learning_rate": 4.782909186894949e-05, "loss": 0.0875, "step": 988, "task_loss": 0.03483529016375542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7312198578957965, "compression_loss": 0.0, "distillation_loss": 0.3490631878376007, "epoch": 0.94, "learning_rate": 4.782474679135097e-05, "loss": 0.3363, "step": 989, "task_loss": 0.22176185250282288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7312476030112766, "compression_loss": 0.0, "distillation_loss": 0.31977570056915283, "epoch": 0.94, "learning_rate": 4.782039756752727e-05, "loss": 0.3017, "step": 990, "task_loss": 0.13884237408638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7312753406643802, "compression_loss": 0.0, "distillation_loss": 0.2667671740055084, "epoch": 0.94, "learning_rate": 4.781604419826845e-05, "loss": 0.2698, "step": 991, "task_loss": 0.2971741855144501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7313030708561112, "compression_loss": 0.0, "distillation_loss": 0.10828962922096252, "epoch": 0.94, "learning_rate": 4.781168668436532e-05, "loss": 0.1087, "step": 992, "task_loss": 0.11210381239652634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7313307935874732, "compression_loss": 0.0, "distillation_loss": 0.3468828499317169, "epoch": 0.94, "learning_rate": 4.780732502660943e-05, "loss": 0.3356, "step": 993, "task_loss": 0.2339598685503006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7313585088594698, "compression_loss": 0.0, "distillation_loss": 0.14131498336791992, "epoch": 0.94, "learning_rate": 4.780295922579312e-05, "loss": 0.1407, "step": 994, "task_loss": 0.1348223239183426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7313862166731049, "compression_loss": 0.0, "distillation_loss": 0.2651304304599762, "epoch": 0.94, "learning_rate": 4.779858928270944e-05, "loss": 0.253, "step": 995, "task_loss": 0.1437424123287201 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.731413917029382, "compression_loss": 0.0, "distillation_loss": 0.11093267053365707, "epoch": 0.95, "learning_rate": 4.7794215198152216e-05, "loss": 0.1031, "step": 996, "task_loss": 0.03270239382982254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7314416099293048, "compression_loss": 0.0, "distillation_loss": 0.2656594514846802, "epoch": 0.95, "learning_rate": 4.778983697291603e-05, "loss": 0.2697, "step": 997, "task_loss": 0.3064509630203247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7314692953738771, "compression_loss": 0.0, "distillation_loss": 0.29231274127960205, "epoch": 0.95, "learning_rate": 4.7785454607796195e-05, "loss": 0.2753, "step": 998, "task_loss": 0.12249677628278732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7314969733641024, "compression_loss": 0.0, "distillation_loss": 0.15542489290237427, "epoch": 0.95, "learning_rate": 4.77810681035888e-05, "loss": 0.154, "step": 999, "task_loss": 0.14133189618587494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7315246439009845, "compression_loss": 0.0, "distillation_loss": 0.27466249465942383, "epoch": 0.95, "learning_rate": 4.777667746109067e-05, "loss": 0.2681, "step": 1000, "task_loss": 0.20898157358169556 }, { "epoch": 0.95, "eval_accuracy": 0.9094036697247706, "eval_loss": 0.3636666536331177, "eval_runtime": 17.8028, "eval_samples_per_second": 48.981, "eval_steps_per_second": 6.123, "step": 1000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7315523069855271, "compression_loss": 0.0, "distillation_loss": 0.10684286057949066, "epoch": 0.95, "learning_rate": 4.7772282681099377e-05, "loss": 0.1, "step": 1001, "task_loss": 0.038139708340168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7315799626187338, "compression_loss": 0.0, "distillation_loss": 0.33934301137924194, "epoch": 0.95, "learning_rate": 4.7767883764413266e-05, "loss": 0.3332, "step": 1002, "task_loss": 0.27778148651123047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7316076108016083, "compression_loss": 0.0, "distillation_loss": 0.2035553753376007, "epoch": 0.95, "learning_rate": 4.776348071183142e-05, "loss": 0.1945, "step": 1003, "task_loss": 0.11305206269025803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7316352515351543, "compression_loss": 0.0, "distillation_loss": 0.1286626160144806, "epoch": 0.95, "learning_rate": 4.775907352415367e-05, "loss": 0.1426, "step": 1004, "task_loss": 0.26838886737823486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7316628848203756, "compression_loss": 0.0, "distillation_loss": 0.4252236485481262, "epoch": 0.95, "learning_rate": 4.7754662202180606e-05, "loss": 0.4083, "step": 1005, "task_loss": 0.2558348774909973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7316905106582756, "compression_loss": 0.0, "distillation_loss": 0.24486252665519714, "epoch": 0.96, "learning_rate": 4.7750246746713565e-05, "loss": 0.2329, "step": 1006, "task_loss": 0.12567219138145447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7317181290498582, "compression_loss": 0.0, "distillation_loss": 0.30694228410720825, "epoch": 0.96, "learning_rate": 4.7745827158554634e-05, "loss": 0.2926, "step": 1007, "task_loss": 0.1638035923242569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7317457399961271, "compression_loss": 0.0, "distillation_loss": 0.22996553778648376, "epoch": 0.96, "learning_rate": 4.774140343850666e-05, "loss": 0.227, "step": 1008, "task_loss": 0.19992922246456146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.731773343498086, "compression_loss": 0.0, "distillation_loss": 0.17360806465148926, "epoch": 0.96, "learning_rate": 4.773697558737322e-05, "loss": 0.1746, "step": 1009, "task_loss": 0.1831490695476532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7318009395567384, "compression_loss": 0.0, "distillation_loss": 0.08699658513069153, "epoch": 0.96, "learning_rate": 4.773254360595867e-05, "loss": 0.082, "step": 1010, "task_loss": 0.03714621439576149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7318285281730881, "compression_loss": 0.0, "distillation_loss": 0.12660613656044006, "epoch": 0.96, "learning_rate": 4.77281074950681e-05, "loss": 0.1187, "step": 1011, "task_loss": 0.04724828153848648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7318561093481386, "compression_loss": 0.0, "distillation_loss": 0.1175093725323677, "epoch": 0.96, "learning_rate": 4.7723667255507334e-05, "loss": 0.1299, "step": 1012, "task_loss": 0.24159134924411774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.731883683082894, "compression_loss": 0.0, "distillation_loss": 0.2919718027114868, "epoch": 0.96, "learning_rate": 4.771922288808297e-05, "loss": 0.2807, "step": 1013, "task_loss": 0.17899443209171295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7319112493783576, "compression_loss": 0.0, "distillation_loss": 0.13135388493537903, "epoch": 0.96, "learning_rate": 4.771477439360235e-05, "loss": 0.1246, "step": 1014, "task_loss": 0.06415353715419769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7319388082355331, "compression_loss": 0.0, "distillation_loss": 0.35100582242012024, "epoch": 0.96, "learning_rate": 4.7710321772873566e-05, "loss": 0.348, "step": 1015, "task_loss": 0.3205385208129883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7319663596554244, "compression_loss": 0.0, "distillation_loss": 0.2460920214653015, "epoch": 0.96, "learning_rate": 4.770586502670546e-05, "loss": 0.2355, "step": 1016, "task_loss": 0.13989022374153137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7319939036390352, "compression_loss": 0.0, "distillation_loss": 0.23754239082336426, "epoch": 0.97, "learning_rate": 4.770140415590762e-05, "loss": 0.2337, "step": 1017, "task_loss": 0.19897620379924774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7320214401873689, "compression_loss": 0.0, "distillation_loss": 0.46301230788230896, "epoch": 0.97, "learning_rate": 4.769693916129039e-05, "loss": 0.4469, "step": 1018, "task_loss": 0.30206912755966187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7320489693014294, "compression_loss": 0.0, "distillation_loss": 0.41447192430496216, "epoch": 0.97, "learning_rate": 4.769247004366485e-05, "loss": 0.3952, "step": 1019, "task_loss": 0.22126147150993347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7320764909822203, "compression_loss": 0.0, "distillation_loss": 0.19669479131698608, "epoch": 0.97, "learning_rate": 4.768799680384283e-05, "loss": 0.1961, "step": 1020, "task_loss": 0.19112606346607208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7321040052307453, "compression_loss": 0.0, "distillation_loss": 0.1391059011220932, "epoch": 0.97, "learning_rate": 4.768351944263693e-05, "loss": 0.1306, "step": 1021, "task_loss": 0.05390199273824692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7321315120480081, "compression_loss": 0.0, "distillation_loss": 0.1612778604030609, "epoch": 0.97, "learning_rate": 4.767903796086048e-05, "loss": 0.1569, "step": 1022, "task_loss": 0.11706437915563583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7321590114350123, "compression_loss": 0.0, "distillation_loss": 0.13082058727741241, "epoch": 0.97, "learning_rate": 4.767455235932756e-05, "loss": 0.1235, "step": 1023, "task_loss": 0.05800522491335869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7321865033927617, "compression_loss": 0.0, "distillation_loss": 0.07101086527109146, "epoch": 0.97, "learning_rate": 4.7670062638853e-05, "loss": 0.0663, "step": 1024, "task_loss": 0.023550687357783318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7322139879222599, "compression_loss": 0.0, "distillation_loss": 0.7248010635375977, "epoch": 0.97, "learning_rate": 4.766556880025238e-05, "loss": 0.6946, "step": 1025, "task_loss": 0.4223037362098694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7322414650245106, "compression_loss": 0.0, "distillation_loss": 0.18530097603797913, "epoch": 0.97, "learning_rate": 4.7661070844342033e-05, "loss": 0.1791, "step": 1026, "task_loss": 0.12346015125513077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7322689347005176, "compression_loss": 0.0, "distillation_loss": 0.436127245426178, "epoch": 0.98, "learning_rate": 4.7656568771939024e-05, "loss": 0.4136, "step": 1027, "task_loss": 0.21047565340995789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7322963969512843, "compression_loss": 0.0, "distillation_loss": 0.33645620942115784, "epoch": 0.98, "learning_rate": 4.765206258386119e-05, "loss": 0.3199, "step": 1028, "task_loss": 0.17079196870326996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7323238517778147, "compression_loss": 0.0, "distillation_loss": 0.2837325930595398, "epoch": 0.98, "learning_rate": 4.7647552280927086e-05, "loss": 0.2713, "step": 1029, "task_loss": 0.15971429646015167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7323512991811123, "compression_loss": 0.0, "distillation_loss": 0.2346368283033371, "epoch": 0.98, "learning_rate": 4.764303786395604e-05, "loss": 0.2218, "step": 1030, "task_loss": 0.10661163181066513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7323787391621808, "compression_loss": 0.0, "distillation_loss": 0.26597869396209717, "epoch": 0.98, "learning_rate": 4.763851933376812e-05, "loss": 0.2678, "step": 1031, "task_loss": 0.284251868724823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7324061717220239, "compression_loss": 0.0, "distillation_loss": 0.21545451879501343, "epoch": 0.98, "learning_rate": 4.763399669118414e-05, "loss": 0.2012, "step": 1032, "task_loss": 0.07268868386745453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7324335968616453, "compression_loss": 0.0, "distillation_loss": 0.3080711364746094, "epoch": 0.98, "learning_rate": 4.762946993702565e-05, "loss": 0.2918, "step": 1033, "task_loss": 0.14552035927772522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7324610145820487, "compression_loss": 0.0, "distillation_loss": 0.2603287100791931, "epoch": 0.98, "learning_rate": 4.7624939072114954e-05, "loss": 0.2473, "step": 1034, "task_loss": 0.12976713478565216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7324884248842377, "compression_loss": 0.0, "distillation_loss": 0.28742021322250366, "epoch": 0.98, "learning_rate": 4.762040409727512e-05, "loss": 0.2733, "step": 1035, "task_loss": 0.14594219624996185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7325158277692161, "compression_loss": 0.0, "distillation_loss": 0.17579105496406555, "epoch": 0.98, "learning_rate": 4.761586501332994e-05, "loss": 0.1813, "step": 1036, "task_loss": 0.23123717308044434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7325432232379875, "compression_loss": 0.0, "distillation_loss": 0.1056813895702362, "epoch": 0.98, "learning_rate": 4.7611321821103954e-05, "loss": 0.1001, "step": 1037, "task_loss": 0.04944942146539688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7325706112915555, "compression_loss": 0.0, "distillation_loss": 0.10237106680870056, "epoch": 0.99, "learning_rate": 4.760677452142247e-05, "loss": 0.098, "step": 1038, "task_loss": 0.05824420601129532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.732597991930924, "compression_loss": 0.0, "distillation_loss": 0.28087666630744934, "epoch": 0.99, "learning_rate": 4.760222311511152e-05, "loss": 0.273, "step": 1039, "task_loss": 0.20193740725517273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7326253651570965, "compression_loss": 0.0, "distillation_loss": 0.31947529315948486, "epoch": 0.99, "learning_rate": 4.759766760299788e-05, "loss": 0.316, "step": 1040, "task_loss": 0.2846387028694153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7326527309710767, "compression_loss": 0.0, "distillation_loss": 0.08732345700263977, "epoch": 0.99, "learning_rate": 4.759310798590909e-05, "loss": 0.0912, "step": 1041, "task_loss": 0.12588536739349365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7326800893738684, "compression_loss": 0.0, "distillation_loss": 0.31434834003448486, "epoch": 0.99, "learning_rate": 4.758854426467343e-05, "loss": 0.3049, "step": 1042, "task_loss": 0.22009915113449097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7327074403664752, "compression_loss": 0.0, "distillation_loss": 0.3221967816352844, "epoch": 0.99, "learning_rate": 4.758397644011992e-05, "loss": 0.3095, "step": 1043, "task_loss": 0.19476738572120667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7327347839499008, "compression_loss": 0.0, "distillation_loss": 0.2775757908821106, "epoch": 0.99, "learning_rate": 4.757940451307831e-05, "loss": 0.2682, "step": 1044, "task_loss": 0.18372797966003418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7327621201251489, "compression_loss": 0.0, "distillation_loss": 0.25040361285209656, "epoch": 0.99, "learning_rate": 4.757482848437914e-05, "loss": 0.2413, "step": 1045, "task_loss": 0.15979930758476257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.732789448893223, "compression_loss": 0.0, "distillation_loss": 0.16249604523181915, "epoch": 0.99, "learning_rate": 4.7570248354853644e-05, "loss": 0.1538, "step": 1046, "task_loss": 0.07583259046077728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.732816770255127, "compression_loss": 0.0, "distillation_loss": 0.09381724894046783, "epoch": 0.99, "learning_rate": 4.7565664125333845e-05, "loss": 0.0878, "step": 1047, "task_loss": 0.033302441239356995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7328440842118646, "compression_loss": 0.0, "distillation_loss": 0.2704293131828308, "epoch": 1.0, "learning_rate": 4.7561075796652464e-05, "loss": 0.2806, "step": 1048, "task_loss": 0.3726223111152649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7328713907644394, "compression_loss": 0.0, "distillation_loss": 0.2771369516849518, "epoch": 1.0, "learning_rate": 4.755648336964302e-05, "loss": 0.2641, "step": 1049, "task_loss": 0.14655812084674835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.732898689913855, "compression_loss": 0.0, "distillation_loss": 0.11366402357816696, "epoch": 1.0, "learning_rate": 4.7551886845139743e-05, "loss": 0.1051, "step": 1050, "task_loss": 0.028415286913514137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7329259816611152, "compression_loss": 0.0, "distillation_loss": 0.152854323387146, "epoch": 1.0, "learning_rate": 4.754728622397761e-05, "loss": 0.1433, "step": 1051, "task_loss": 0.05741133540868759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.7329532660072237, "compression_loss": 0.0, "distillation_loss": 0.18268108367919922, "epoch": 1.0, "learning_rate": 4.754268150699234e-05, "loss": 0.1803, "step": 1052, "task_loss": 0.15914951264858246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5468226808906795, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7000000210462276, "compression/magnitude_sparsity/target_sparsity_level": 0.732980542953184, "compression_loss": 0.0, "distillation_loss": 0.04385395348072052, "epoch": 1.0, "learning_rate": 4.753807269502041e-05, "loss": 0.0402, "step": 1053, "task_loss": 0.007449280470609665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7330078125, "compression_loss": 0.0, "distillation_loss": 0.29212307929992676, "epoch": 1.0, "learning_rate": 4.7533459788899026e-05, "loss": 0.277, "step": 1054, "task_loss": 0.1408318132162094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7330350746486752, "compression_loss": 0.0, "distillation_loss": 0.34965944290161133, "epoch": 1.0, "learning_rate": 4.752884278946614e-05, "loss": 0.3271, "step": 1055, "task_loss": 0.12405388802289963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7330623294002135, "compression_loss": 0.0, "distillation_loss": 0.46271616220474243, "epoch": 1.0, "learning_rate": 4.752422169756048e-05, "loss": 0.4489, "step": 1056, "task_loss": 0.3246670067310333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7330895767556185, "compression_loss": 0.0, "distillation_loss": 0.26765865087509155, "epoch": 1.0, "learning_rate": 4.7519596514021464e-05, "loss": 0.2574, "step": 1057, "task_loss": 0.16510917246341705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7331168167158937, "compression_loss": 0.0, "distillation_loss": 0.7084404230117798, "epoch": 1.0, "learning_rate": 4.751496723968929e-05, "loss": 0.6848, "step": 1058, "task_loss": 0.4719713628292084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7331440492820429, "compression_loss": 0.0, "distillation_loss": 0.19782431423664093, "epoch": 1.01, "learning_rate": 4.751033387540488e-05, "loss": 0.1849, "step": 1059, "task_loss": 0.06854899227619171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7331712744550699, "compression_loss": 0.0, "distillation_loss": 0.44147199392318726, "epoch": 1.01, "learning_rate": 4.7505696422009904e-05, "loss": 0.4319, "step": 1060, "task_loss": 0.34560465812683105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7331984922359782, "compression_loss": 0.0, "distillation_loss": 0.5233361124992371, "epoch": 1.01, "learning_rate": 4.750105488034679e-05, "loss": 0.4983, "step": 1061, "task_loss": 0.27257034182548523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7332257026257717, "compression_loss": 0.0, "distillation_loss": 0.5197453498840332, "epoch": 1.01, "learning_rate": 4.749640925125869e-05, "loss": 0.5011, "step": 1062, "task_loss": 0.3330010771751404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7332529056254539, "compression_loss": 0.0, "distillation_loss": 0.17582668364048004, "epoch": 1.01, "learning_rate": 4.749175953558951e-05, "loss": 0.1842, "step": 1063, "task_loss": 0.2600041329860687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7332801012360284, "compression_loss": 0.0, "distillation_loss": 0.4421820640563965, "epoch": 1.01, "learning_rate": 4.748710573418388e-05, "loss": 0.4457, "step": 1064, "task_loss": 0.47691667079925537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7333072894584991, "compression_loss": 0.0, "distillation_loss": 0.18625092506408691, "epoch": 1.01, "learning_rate": 4.7482447847887204e-05, "loss": 0.1779, "step": 1065, "task_loss": 0.1030142530798912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7333344702938697, "compression_loss": 0.0, "distillation_loss": 0.19206887483596802, "epoch": 1.01, "learning_rate": 4.747778587754559e-05, "loss": 0.1904, "step": 1066, "task_loss": 0.1756243109703064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7333616437431436, "compression_loss": 0.0, "distillation_loss": 0.28138798475265503, "epoch": 1.01, "learning_rate": 4.7473119824005926e-05, "loss": 0.2826, "step": 1067, "task_loss": 0.29329320788383484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7333888098073248, "compression_loss": 0.0, "distillation_loss": 0.14245599508285522, "epoch": 1.01, "learning_rate": 4.7468449688115806e-05, "loss": 0.1334, "step": 1068, "task_loss": 0.05212767794728279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7334159684874167, "compression_loss": 0.0, "distillation_loss": 0.517392635345459, "epoch": 1.02, "learning_rate": 4.74637754707236e-05, "loss": 0.5164, "step": 1069, "task_loss": 0.5071411728858948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7334431197844233, "compression_loss": 0.0, "distillation_loss": 0.3079312741756439, "epoch": 1.02, "learning_rate": 4.7459097172678386e-05, "loss": 0.3028, "step": 1070, "task_loss": 0.2564443051815033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.733470263699348, "compression_loss": 0.0, "distillation_loss": 0.07088293135166168, "epoch": 1.02, "learning_rate": 4.745441479483001e-05, "loss": 0.0657, "step": 1071, "task_loss": 0.019033435732126236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7334974002331945, "compression_loss": 0.0, "distillation_loss": 0.1880199909210205, "epoch": 1.02, "learning_rate": 4.744972833802904e-05, "loss": 0.1756, "step": 1072, "task_loss": 0.06384404003620148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7335245293869668, "compression_loss": 0.0, "distillation_loss": 0.36879584193229675, "epoch": 1.02, "learning_rate": 4.74450378031268e-05, "loss": 0.3632, "step": 1073, "task_loss": 0.3126252293586731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7335516511616682, "compression_loss": 0.0, "distillation_loss": 0.33385515213012695, "epoch": 1.02, "learning_rate": 4.744034319097535e-05, "loss": 0.3191, "step": 1074, "task_loss": 0.18633373081684113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7335787655583026, "compression_loss": 0.0, "distillation_loss": 0.3007218539714813, "epoch": 1.02, "learning_rate": 4.743564450242749e-05, "loss": 0.2902, "step": 1075, "task_loss": 0.19521453976631165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7336058725778736, "compression_loss": 0.0, "distillation_loss": 0.2639384865760803, "epoch": 1.02, "learning_rate": 4.7430941738336745e-05, "loss": 0.2496, "step": 1076, "task_loss": 0.120790995657444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7336329722213848, "compression_loss": 0.0, "distillation_loss": 0.3540310859680176, "epoch": 1.02, "learning_rate": 4.742623489955741e-05, "loss": 0.3531, "step": 1077, "task_loss": 0.34429335594177246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7336600644898401, "compression_loss": 0.0, "distillation_loss": 0.3709731698036194, "epoch": 1.02, "learning_rate": 4.74215239869445e-05, "loss": 0.3647, "step": 1078, "task_loss": 0.30800601840019226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.733687149384243, "compression_loss": 0.0, "distillation_loss": 0.22464478015899658, "epoch": 1.02, "learning_rate": 4.741680900135377e-05, "loss": 0.2118, "step": 1079, "task_loss": 0.09570850431919098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7337142269055972, "compression_loss": 0.0, "distillation_loss": 0.4049076437950134, "epoch": 1.03, "learning_rate": 4.741208994364173e-05, "loss": 0.3865, "step": 1080, "task_loss": 0.2211461365222931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7337412970549065, "compression_loss": 0.0, "distillation_loss": 0.1285635232925415, "epoch": 1.03, "learning_rate": 4.740736681466561e-05, "loss": 0.1211, "step": 1081, "task_loss": 0.0543033629655838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7337683598331745, "compression_loss": 0.0, "distillation_loss": 0.15440712869167328, "epoch": 1.03, "learning_rate": 4.74026396152834e-05, "loss": 0.1622, "step": 1082, "task_loss": 0.2323606014251709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7337954152414049, "compression_loss": 0.0, "distillation_loss": 0.06974273920059204, "epoch": 1.03, "learning_rate": 4.7397908346353796e-05, "loss": 0.0666, "step": 1083, "task_loss": 0.038624271750450134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7338224632806014, "compression_loss": 0.0, "distillation_loss": 0.41569557785987854, "epoch": 1.03, "learning_rate": 4.739317300873628e-05, "loss": 0.3924, "step": 1084, "task_loss": 0.1831234097480774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7338495039517676, "compression_loss": 0.0, "distillation_loss": 0.3054138720035553, "epoch": 1.03, "learning_rate": 4.738843360329104e-05, "loss": 0.2951, "step": 1085, "task_loss": 0.20198288559913635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7338765372559072, "compression_loss": 0.0, "distillation_loss": 0.26133251190185547, "epoch": 1.03, "learning_rate": 4.738369013087902e-05, "loss": 0.2525, "step": 1086, "task_loss": 0.17272129654884338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7339035631940241, "compression_loss": 0.0, "distillation_loss": 0.20728163421154022, "epoch": 1.03, "learning_rate": 4.7378942592361876e-05, "loss": 0.1958, "step": 1087, "task_loss": 0.09262159466743469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7339305817671216, "compression_loss": 0.0, "distillation_loss": 0.11674150079488754, "epoch": 1.03, "learning_rate": 4.737419098860204e-05, "loss": 0.108, "step": 1088, "task_loss": 0.029347870498895645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7339575929762037, "compression_loss": 0.0, "distillation_loss": 0.3030095100402832, "epoch": 1.03, "learning_rate": 4.7369435320462654e-05, "loss": 0.2916, "step": 1089, "task_loss": 0.1885221302509308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7339845968222738, "compression_loss": 0.0, "distillation_loss": 0.05839750915765762, "epoch": 1.04, "learning_rate": 4.73646755888076e-05, "loss": 0.054, "step": 1090, "task_loss": 0.014137573540210724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7340115933063359, "compression_loss": 0.0, "distillation_loss": 0.1552124172449112, "epoch": 1.04, "learning_rate": 4.7359911794501526e-05, "loss": 0.1474, "step": 1091, "task_loss": 0.07671723514795303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7340385824293935, "compression_loss": 0.0, "distillation_loss": 0.273028165102005, "epoch": 1.04, "learning_rate": 4.7355143938409785e-05, "loss": 0.2641, "step": 1092, "task_loss": 0.18373528122901917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7340655641924503, "compression_loss": 0.0, "distillation_loss": 0.2551477551460266, "epoch": 1.04, "learning_rate": 4.735037202139849e-05, "loss": 0.2586, "step": 1093, "task_loss": 0.28983938694000244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.73409253859651, "compression_loss": 0.0, "distillation_loss": 0.15983590483665466, "epoch": 1.04, "learning_rate": 4.734559604433447e-05, "loss": 0.1529, "step": 1094, "task_loss": 0.09036150574684143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7341195056425763, "compression_loss": 0.0, "distillation_loss": 0.13693635165691376, "epoch": 1.04, "learning_rate": 4.734081600808531e-05, "loss": 0.1312, "step": 1095, "task_loss": 0.07925444841384888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7341464653316528, "compression_loss": 0.0, "distillation_loss": 0.17353419959545135, "epoch": 1.04, "learning_rate": 4.733603191351933e-05, "loss": 0.1626, "step": 1096, "task_loss": 0.06457867473363876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7341734176647433, "compression_loss": 0.0, "distillation_loss": 0.15793973207473755, "epoch": 1.04, "learning_rate": 4.733124376150558e-05, "loss": 0.1539, "step": 1097, "task_loss": 0.11756959557533264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7342003626428515, "compression_loss": 0.0, "distillation_loss": 0.08797385543584824, "epoch": 1.04, "learning_rate": 4.7326451552913856e-05, "loss": 0.0843, "step": 1098, "task_loss": 0.05155399069190025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7342273002669808, "compression_loss": 0.0, "distillation_loss": 0.1670214980840683, "epoch": 1.04, "learning_rate": 4.7321655288614674e-05, "loss": 0.1685, "step": 1099, "task_loss": 0.18175487220287323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7342542305381352, "compression_loss": 0.0, "distillation_loss": 0.27932503819465637, "epoch": 1.04, "learning_rate": 4.7316854969479314e-05, "loss": 0.271, "step": 1100, "task_loss": 0.1957254558801651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7342811534573183, "compression_loss": 0.0, "distillation_loss": 0.3590747117996216, "epoch": 1.05, "learning_rate": 4.7312050596379764e-05, "loss": 0.3598, "step": 1101, "task_loss": 0.36656391620635986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7343080690255337, "compression_loss": 0.0, "distillation_loss": 0.17354151606559753, "epoch": 1.05, "learning_rate": 4.730724217018877e-05, "loss": 0.1851, "step": 1102, "task_loss": 0.2888186275959015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7343349772437852, "compression_loss": 0.0, "distillation_loss": 0.17269417643547058, "epoch": 1.05, "learning_rate": 4.7302429691779806e-05, "loss": 0.1714, "step": 1103, "task_loss": 0.1596398800611496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7343618781130763, "compression_loss": 0.0, "distillation_loss": 0.23135778307914734, "epoch": 1.05, "learning_rate": 4.729761316202708e-05, "loss": 0.2202, "step": 1104, "task_loss": 0.11965839564800262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7343887716344109, "compression_loss": 0.0, "distillation_loss": 0.24613159894943237, "epoch": 1.05, "learning_rate": 4.729279258180553e-05, "loss": 0.2429, "step": 1105, "task_loss": 0.21396657824516296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7344156578087925, "compression_loss": 0.0, "distillation_loss": 0.34451723098754883, "epoch": 1.05, "learning_rate": 4.7287967951990855e-05, "loss": 0.3342, "step": 1106, "task_loss": 0.2409912496805191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7344425366372249, "compression_loss": 0.0, "distillation_loss": 0.15786179900169373, "epoch": 1.05, "learning_rate": 4.7283139273459445e-05, "loss": 0.1565, "step": 1107, "task_loss": 0.14464277029037476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7344694081207117, "compression_loss": 0.0, "distillation_loss": 0.18467701971530914, "epoch": 1.05, "learning_rate": 4.727830654708848e-05, "loss": 0.1777, "step": 1108, "task_loss": 0.11510906368494034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7344962722602567, "compression_loss": 0.0, "distillation_loss": 0.22931762039661407, "epoch": 1.05, "learning_rate": 4.727346977375584e-05, "loss": 0.22, "step": 1109, "task_loss": 0.13630938529968262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7345231290568635, "compression_loss": 0.0, "distillation_loss": 0.35741716623306274, "epoch": 1.05, "learning_rate": 4.7268628954340136e-05, "loss": 0.3397, "step": 1110, "task_loss": 0.18016816675662994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7345499785115357, "compression_loss": 0.0, "distillation_loss": 0.24529217183589935, "epoch": 1.06, "learning_rate": 4.726378408972074e-05, "loss": 0.238, "step": 1111, "task_loss": 0.1728375256061554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7345768206252772, "compression_loss": 0.0, "distillation_loss": 0.29263418912887573, "epoch": 1.06, "learning_rate": 4.725893518077774e-05, "loss": 0.2744, "step": 1112, "task_loss": 0.11022615432739258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7346036553990914, "compression_loss": 0.0, "distillation_loss": 0.12653577327728271, "epoch": 1.06, "learning_rate": 4.725408222839197e-05, "loss": 0.1174, "step": 1113, "task_loss": 0.034942492842674255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7346304828339822, "compression_loss": 0.0, "distillation_loss": 0.37742096185684204, "epoch": 1.06, "learning_rate": 4.724922523344498e-05, "loss": 0.3659, "step": 1114, "task_loss": 0.26208043098449707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7346573029309533, "compression_loss": 0.0, "distillation_loss": 0.40198618173599243, "epoch": 1.06, "learning_rate": 4.724436419681907e-05, "loss": 0.3978, "step": 1115, "task_loss": 0.3604857623577118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7346841156910082, "compression_loss": 0.0, "distillation_loss": 0.24844889342784882, "epoch": 1.06, "learning_rate": 4.723949911939728e-05, "loss": 0.2425, "step": 1116, "task_loss": 0.1891540288925171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7347109211151508, "compression_loss": 0.0, "distillation_loss": 0.5047147274017334, "epoch": 1.06, "learning_rate": 4.723463000206337e-05, "loss": 0.4934, "step": 1117, "task_loss": 0.39201080799102783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7347377192043846, "compression_loss": 0.0, "distillation_loss": 0.1238306388258934, "epoch": 1.06, "learning_rate": 4.722975684570183e-05, "loss": 0.1146, "step": 1118, "task_loss": 0.03139756619930267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7347645099597133, "compression_loss": 0.0, "distillation_loss": 0.23237791657447815, "epoch": 1.06, "learning_rate": 4.7224879651197905e-05, "loss": 0.2398, "step": 1119, "task_loss": 0.30690667033195496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7347912933821407, "compression_loss": 0.0, "distillation_loss": 0.20782580971717834, "epoch": 1.06, "learning_rate": 4.721999841943755e-05, "loss": 0.2096, "step": 1120, "task_loss": 0.22543630003929138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7348180694726704, "compression_loss": 0.0, "distillation_loss": 0.18192127346992493, "epoch": 1.06, "learning_rate": 4.721511315130747e-05, "loss": 0.1793, "step": 1121, "task_loss": 0.15596555173397064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7348448382323061, "compression_loss": 0.0, "distillation_loss": 0.14186906814575195, "epoch": 1.07, "learning_rate": 4.7210223847695104e-05, "loss": 0.1417, "step": 1122, "task_loss": 0.14012937247753143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7348715996620514, "compression_loss": 0.0, "distillation_loss": 0.33551573753356934, "epoch": 1.07, "learning_rate": 4.72053305094886e-05, "loss": 0.3328, "step": 1123, "task_loss": 0.3087965250015259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.73489835376291, "compression_loss": 0.0, "distillation_loss": 0.16018086671829224, "epoch": 1.07, "learning_rate": 4.720043313757687e-05, "loss": 0.1579, "step": 1124, "task_loss": 0.13755735754966736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7349251005358858, "compression_loss": 0.0, "distillation_loss": 0.0789952278137207, "epoch": 1.07, "learning_rate": 4.719553173284955e-05, "loss": 0.0733, "step": 1125, "task_loss": 0.021938461810350418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7349518399819822, "compression_loss": 0.0, "distillation_loss": 0.1255275011062622, "epoch": 1.07, "learning_rate": 4.719062629619699e-05, "loss": 0.1193, "step": 1126, "task_loss": 0.06374667584896088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7349785721022031, "compression_loss": 0.0, "distillation_loss": 0.14572109282016754, "epoch": 1.07, "learning_rate": 4.71857168285103e-05, "loss": 0.1458, "step": 1127, "task_loss": 0.14671580493450165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.735005296897552, "compression_loss": 0.0, "distillation_loss": 0.18587951362133026, "epoch": 1.07, "learning_rate": 4.718080333068129e-05, "loss": 0.186, "step": 1128, "task_loss": 0.18705090880393982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7350320143690328, "compression_loss": 0.0, "distillation_loss": 0.31489160656929016, "epoch": 1.07, "learning_rate": 4.717588580360253e-05, "loss": 0.2983, "step": 1129, "task_loss": 0.14935556054115295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7350587245176489, "compression_loss": 0.0, "distillation_loss": 0.2369147539138794, "epoch": 1.07, "learning_rate": 4.717096424816731e-05, "loss": 0.2477, "step": 1130, "task_loss": 0.3448143005371094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7350854273444042, "compression_loss": 0.0, "distillation_loss": 0.24669213593006134, "epoch": 1.07, "learning_rate": 4.716603866526967e-05, "loss": 0.2424, "step": 1131, "task_loss": 0.20330274105072021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7351121228503023, "compression_loss": 0.0, "distillation_loss": 0.29487940669059753, "epoch": 1.08, "learning_rate": 4.7161109055804356e-05, "loss": 0.286, "step": 1132, "task_loss": 0.20643703639507294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7351388110363469, "compression_loss": 0.0, "distillation_loss": 0.35977235436439514, "epoch": 1.08, "learning_rate": 4.7156175420666844e-05, "loss": 0.3461, "step": 1133, "task_loss": 0.22308805584907532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7351654919035416, "compression_loss": 0.0, "distillation_loss": 0.21201397478580475, "epoch": 1.08, "learning_rate": 4.715123776075336e-05, "loss": 0.2005, "step": 1134, "task_loss": 0.09711904078722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7351921654528903, "compression_loss": 0.0, "distillation_loss": 0.16644607484340668, "epoch": 1.08, "learning_rate": 4.714629607696086e-05, "loss": 0.1694, "step": 1135, "task_loss": 0.19630329310894012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7352188316853965, "compression_loss": 0.0, "distillation_loss": 0.06290404498577118, "epoch": 1.08, "learning_rate": 4.714135037018702e-05, "loss": 0.0586, "step": 1136, "task_loss": 0.01941380277276039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7352454906020639, "compression_loss": 0.0, "distillation_loss": 0.2712949216365814, "epoch": 1.08, "learning_rate": 4.713640064133025e-05, "loss": 0.2624, "step": 1137, "task_loss": 0.18186871707439423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7352721422038961, "compression_loss": 0.0, "distillation_loss": 0.11187370866537094, "epoch": 1.08, "learning_rate": 4.7131446891289694e-05, "loss": 0.1134, "step": 1138, "task_loss": 0.12678144872188568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7352987864918971, "compression_loss": 0.0, "distillation_loss": 0.1483362764120102, "epoch": 1.08, "learning_rate": 4.712648912096522e-05, "loss": 0.1437, "step": 1139, "task_loss": 0.10216772556304932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7353254234670703, "compression_loss": 0.0, "distillation_loss": 0.204280823469162, "epoch": 1.08, "learning_rate": 4.712152733125744e-05, "loss": 0.2035, "step": 1140, "task_loss": 0.196111261844635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7353520531304194, "compression_loss": 0.0, "distillation_loss": 0.18241867423057556, "epoch": 1.08, "learning_rate": 4.711656152306768e-05, "loss": 0.1709, "step": 1141, "task_loss": 0.06716363877058029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7353786754829481, "compression_loss": 0.0, "distillation_loss": 0.18135663866996765, "epoch": 1.08, "learning_rate": 4.711159169729801e-05, "loss": 0.1737, "step": 1142, "task_loss": 0.10434912145137787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7354052905256602, "compression_loss": 0.0, "distillation_loss": 0.16455084085464478, "epoch": 1.09, "learning_rate": 4.710661785485121e-05, "loss": 0.1577, "step": 1143, "task_loss": 0.09606322646141052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7354318982595592, "compression_loss": 0.0, "distillation_loss": 0.18896543979644775, "epoch": 1.09, "learning_rate": 4.710163999663081e-05, "loss": 0.1757, "step": 1144, "task_loss": 0.05640107020735741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7354584986856489, "compression_loss": 0.0, "distillation_loss": 0.21004149317741394, "epoch": 1.09, "learning_rate": 4.709665812354107e-05, "loss": 0.2006, "step": 1145, "task_loss": 0.11531868577003479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7354850918049332, "compression_loss": 0.0, "distillation_loss": 0.06717594712972641, "epoch": 1.09, "learning_rate": 4.709167223648695e-05, "loss": 0.0618, "step": 1146, "task_loss": 0.013451127335429192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7355116776184153, "compression_loss": 0.0, "distillation_loss": 0.06622839719057083, "epoch": 1.09, "learning_rate": 4.7086682336374187e-05, "loss": 0.0771, "step": 1147, "task_loss": 0.17544050514698029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7355382561270991, "compression_loss": 0.0, "distillation_loss": 0.42941948771476746, "epoch": 1.09, "learning_rate": 4.70816884241092e-05, "loss": 0.4142, "step": 1148, "task_loss": 0.2768740653991699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7355648273319885, "compression_loss": 0.0, "distillation_loss": 0.3746296763420105, "epoch": 1.09, "learning_rate": 4.7076690500599164e-05, "loss": 0.3572, "step": 1149, "task_loss": 0.20025447010993958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7355913912340868, "compression_loss": 0.0, "distillation_loss": 0.1592552214860916, "epoch": 1.09, "learning_rate": 4.707168856675198e-05, "loss": 0.1792, "step": 1150, "task_loss": 0.3586212992668152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.735617947834398, "compression_loss": 0.0, "distillation_loss": 0.16237607598304749, "epoch": 1.09, "learning_rate": 4.7066682623476265e-05, "loss": 0.1536, "step": 1151, "task_loss": 0.07469609379768372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7356444971339255, "compression_loss": 0.0, "distillation_loss": 0.18786782026290894, "epoch": 1.09, "learning_rate": 4.706167267168138e-05, "loss": 0.1743, "step": 1152, "task_loss": 0.05179518088698387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7356710391336734, "compression_loss": 0.0, "distillation_loss": 0.41019895672798157, "epoch": 1.09, "learning_rate": 4.70566587122774e-05, "loss": 0.4003, "step": 1153, "task_loss": 0.311697781085968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.735697573834645, "compression_loss": 0.0, "distillation_loss": 0.27642178535461426, "epoch": 1.1, "learning_rate": 4.7051640746175147e-05, "loss": 0.2717, "step": 1154, "task_loss": 0.22955429553985596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.735724101237844, "compression_loss": 0.0, "distillation_loss": 0.41415461897850037, "epoch": 1.1, "learning_rate": 4.7046618774286146e-05, "loss": 0.3986, "step": 1155, "task_loss": 0.2587291896343231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7357506213442743, "compression_loss": 0.0, "distillation_loss": 0.10836180299520493, "epoch": 1.1, "learning_rate": 4.7041592797522664e-05, "loss": 0.1089, "step": 1156, "task_loss": 0.11396181583404541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7357771341549394, "compression_loss": 0.0, "distillation_loss": 0.3199521601200104, "epoch": 1.1, "learning_rate": 4.7036562816797705e-05, "loss": 0.3084, "step": 1157, "task_loss": 0.2041257917881012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7358036396708431, "compression_loss": 0.0, "distillation_loss": 0.26940402388572693, "epoch": 1.1, "learning_rate": 4.7031528833024976e-05, "loss": 0.2594, "step": 1158, "task_loss": 0.16965427994728088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7358301378929889, "compression_loss": 0.0, "distillation_loss": 0.15760007500648499, "epoch": 1.1, "learning_rate": 4.702649084711892e-05, "loss": 0.167, "step": 1159, "task_loss": 0.2519673705101013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7358566288223808, "compression_loss": 0.0, "distillation_loss": 0.44500085711479187, "epoch": 1.1, "learning_rate": 4.7021448859994735e-05, "loss": 0.4191, "step": 1160, "task_loss": 0.1861766129732132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7358831124600221, "compression_loss": 0.0, "distillation_loss": 0.10013365000486374, "epoch": 1.1, "learning_rate": 4.70164028725683e-05, "loss": 0.093, "step": 1161, "task_loss": 0.029256567358970642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7359095888069168, "compression_loss": 0.0, "distillation_loss": 0.30927249789237976, "epoch": 1.1, "learning_rate": 4.7011352885756255e-05, "loss": 0.2942, "step": 1162, "task_loss": 0.15870612859725952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7359360578640685, "compression_loss": 0.0, "distillation_loss": 0.13697347044944763, "epoch": 1.1, "learning_rate": 4.7006298900475954e-05, "loss": 0.1302, "step": 1163, "task_loss": 0.06875795125961304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7359625196324807, "compression_loss": 0.0, "distillation_loss": 0.37151408195495605, "epoch": 1.11, "learning_rate": 4.7001240917645465e-05, "loss": 0.3618, "step": 1164, "task_loss": 0.27404022216796875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7359889741131573, "compression_loss": 0.0, "distillation_loss": 0.08429434895515442, "epoch": 1.11, "learning_rate": 4.699617893818361e-05, "loss": 0.079, "step": 1165, "task_loss": 0.031680069863796234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.736015421307102, "compression_loss": 0.0, "distillation_loss": 0.20043613016605377, "epoch": 1.11, "learning_rate": 4.699111296300992e-05, "loss": 0.1903, "step": 1166, "task_loss": 0.09880296885967255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7360418612153182, "compression_loss": 0.0, "distillation_loss": 0.3577537536621094, "epoch": 1.11, "learning_rate": 4.6986042993044645e-05, "loss": 0.344, "step": 1167, "task_loss": 0.220191091299057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7360682938388099, "compression_loss": 0.0, "distillation_loss": 0.3422430455684662, "epoch": 1.11, "learning_rate": 4.698096902920877e-05, "loss": 0.3224, "step": 1168, "task_loss": 0.1436607986688614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7360947191785806, "compression_loss": 0.0, "distillation_loss": 0.20955124497413635, "epoch": 1.11, "learning_rate": 4.6975891072424015e-05, "loss": 0.2027, "step": 1169, "task_loss": 0.14070504903793335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7361211372356341, "compression_loss": 0.0, "distillation_loss": 0.16709572076797485, "epoch": 1.11, "learning_rate": 4.697080912361281e-05, "loss": 0.1657, "step": 1170, "task_loss": 0.15344390273094177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7361475480109739, "compression_loss": 0.0, "distillation_loss": 0.1173921525478363, "epoch": 1.11, "learning_rate": 4.696572318369831e-05, "loss": 0.1092, "step": 1171, "task_loss": 0.035038650035858154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7361739515056039, "compression_loss": 0.0, "distillation_loss": 0.09274931252002716, "epoch": 1.11, "learning_rate": 4.696063325360441e-05, "loss": 0.0872, "step": 1172, "task_loss": 0.03688472509384155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7362003477205277, "compression_loss": 0.0, "distillation_loss": 0.19270846247673035, "epoch": 1.11, "learning_rate": 4.6955539334255716e-05, "loss": 0.1832, "step": 1173, "task_loss": 0.09803463518619537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7362267366567488, "compression_loss": 0.0, "distillation_loss": 0.07251520454883575, "epoch": 1.11, "learning_rate": 4.6950441426577565e-05, "loss": 0.0787, "step": 1174, "task_loss": 0.13475212454795837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7362531183152712, "compression_loss": 0.0, "distillation_loss": 0.3601876199245453, "epoch": 1.12, "learning_rate": 4.694533953149601e-05, "loss": 0.3481, "step": 1175, "task_loss": 0.23886962234973907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7362794926970984, "compression_loss": 0.0, "distillation_loss": 0.24627402424812317, "epoch": 1.12, "learning_rate": 4.694023364993784e-05, "loss": 0.2416, "step": 1176, "task_loss": 0.19923092424869537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.736305859803234, "compression_loss": 0.0, "distillation_loss": 0.1757299304008484, "epoch": 1.12, "learning_rate": 4.693512378283056e-05, "loss": 0.1673, "step": 1177, "task_loss": 0.09126180410385132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7363322196346819, "compression_loss": 0.0, "distillation_loss": 0.127120703458786, "epoch": 1.12, "learning_rate": 4.693000993110241e-05, "loss": 0.1282, "step": 1178, "task_loss": 0.13783586025238037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7363585721924456, "compression_loss": 0.0, "distillation_loss": 0.02453036420047283, "epoch": 1.12, "learning_rate": 4.692489209568234e-05, "loss": 0.0399, "step": 1179, "task_loss": 0.17848730087280273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7363849174775289, "compression_loss": 0.0, "distillation_loss": 0.21834155917167664, "epoch": 1.12, "learning_rate": 4.691977027750002e-05, "loss": 0.2075, "step": 1180, "task_loss": 0.11009831726551056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7364112554909354, "compression_loss": 0.0, "distillation_loss": 0.33225932717323303, "epoch": 1.12, "learning_rate": 4.691464447748587e-05, "loss": 0.3258, "step": 1181, "task_loss": 0.2678440511226654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7364375862336688, "compression_loss": 0.0, "distillation_loss": 0.08998244255781174, "epoch": 1.12, "learning_rate": 4.690951469657101e-05, "loss": 0.1002, "step": 1182, "task_loss": 0.1922500729560852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7364639097067329, "compression_loss": 0.0, "distillation_loss": 0.07570269703865051, "epoch": 1.12, "learning_rate": 4.690438093568728e-05, "loss": 0.071, "step": 1183, "task_loss": 0.028192538768053055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7364902259111312, "compression_loss": 0.0, "distillation_loss": 0.1313639134168625, "epoch": 1.12, "learning_rate": 4.689924319576727e-05, "loss": 0.139, "step": 1184, "task_loss": 0.20730452239513397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7365165348478675, "compression_loss": 0.0, "distillation_loss": 0.2448119819164276, "epoch": 1.13, "learning_rate": 4.689410147774426e-05, "loss": 0.2476, "step": 1185, "task_loss": 0.2722412347793579 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7365428365179455, "compression_loss": 0.0, "distillation_loss": 0.35139238834381104, "epoch": 1.13, "learning_rate": 4.6888955782552274e-05, "loss": 0.3343, "step": 1186, "task_loss": 0.18060128390789032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7365691309223688, "compression_loss": 0.0, "distillation_loss": 0.09512563794851303, "epoch": 1.13, "learning_rate": 4.688380611112605e-05, "loss": 0.0879, "step": 1187, "task_loss": 0.022444602102041245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.736595418062141, "compression_loss": 0.0, "distillation_loss": 0.23818418383598328, "epoch": 1.13, "learning_rate": 4.687865246440106e-05, "loss": 0.2262, "step": 1188, "task_loss": 0.11839476227760315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7366216979382659, "compression_loss": 0.0, "distillation_loss": 0.23918560147285461, "epoch": 1.13, "learning_rate": 4.687349484331347e-05, "loss": 0.2253, "step": 1189, "task_loss": 0.10081011056900024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7366479705517474, "compression_loss": 0.0, "distillation_loss": 0.0738721638917923, "epoch": 1.13, "learning_rate": 4.6868333248800204e-05, "loss": 0.0835, "step": 1190, "task_loss": 0.1698300540447235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7366742359035887, "compression_loss": 0.0, "distillation_loss": 0.32272762060165405, "epoch": 1.13, "learning_rate": 4.686316768179889e-05, "loss": 0.3285, "step": 1191, "task_loss": 0.3799636662006378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7367004939947939, "compression_loss": 0.0, "distillation_loss": 0.2319181114435196, "epoch": 1.13, "learning_rate": 4.685799814324786e-05, "loss": 0.2353, "step": 1192, "task_loss": 0.26583027839660645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7367267448263665, "compression_loss": 0.0, "distillation_loss": 0.17839057743549347, "epoch": 1.13, "learning_rate": 4.685282463408621e-05, "loss": 0.1696, "step": 1193, "task_loss": 0.09003418684005737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7367529883993101, "compression_loss": 0.0, "distillation_loss": 0.05834461748600006, "epoch": 1.13, "learning_rate": 4.6847647155253716e-05, "loss": 0.054, "step": 1194, "task_loss": 0.014876075088977814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7367792247146285, "compression_loss": 0.0, "distillation_loss": 0.09124276041984558, "epoch": 1.13, "learning_rate": 4.684246570769089e-05, "loss": 0.0846, "step": 1195, "task_loss": 0.02449551224708557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7368054537733254, "compression_loss": 0.0, "distillation_loss": 0.21645143628120422, "epoch": 1.14, "learning_rate": 4.683728029233898e-05, "loss": 0.204, "step": 1196, "task_loss": 0.09219798445701599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7368316755764045, "compression_loss": 0.0, "distillation_loss": 0.062870554625988, "epoch": 1.14, "learning_rate": 4.683209091013994e-05, "loss": 0.0685, "step": 1197, "task_loss": 0.11939730495214462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7368578901248695, "compression_loss": 0.0, "distillation_loss": 0.04577638953924179, "epoch": 1.14, "learning_rate": 4.682689756203643e-05, "loss": 0.0508, "step": 1198, "task_loss": 0.0960756242275238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7368840974197238, "compression_loss": 0.0, "distillation_loss": 0.08127100765705109, "epoch": 1.14, "learning_rate": 4.682170024897187e-05, "loss": 0.0913, "step": 1199, "task_loss": 0.1819191575050354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7369102974619715, "compression_loss": 0.0, "distillation_loss": 0.06385953724384308, "epoch": 1.14, "learning_rate": 4.681649897189036e-05, "loss": 0.0621, "step": 1200, "task_loss": 0.04620221257209778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7369364902526159, "compression_loss": 0.0, "distillation_loss": 0.3319193720817566, "epoch": 1.14, "learning_rate": 4.681129373173674e-05, "loss": 0.3153, "step": 1201, "task_loss": 0.16523145139217377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.736962675792661, "compression_loss": 0.0, "distillation_loss": 0.11838692426681519, "epoch": 1.14, "learning_rate": 4.6806084529456574e-05, "loss": 0.1378, "step": 1202, "task_loss": 0.3123231530189514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7369888540831102, "compression_loss": 0.0, "distillation_loss": 0.27343183755874634, "epoch": 1.14, "learning_rate": 4.6800871365996135e-05, "loss": 0.2706, "step": 1203, "task_loss": 0.244797021150589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7370150251249674, "compression_loss": 0.0, "distillation_loss": 0.37955009937286377, "epoch": 1.14, "learning_rate": 4.679565424230241e-05, "loss": 0.3633, "step": 1204, "task_loss": 0.2169659435749054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7370411889192363, "compression_loss": 0.0, "distillation_loss": 0.11844378709793091, "epoch": 1.14, "learning_rate": 4.679043315932313e-05, "loss": 0.1225, "step": 1205, "task_loss": 0.15944825112819672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7370673454669204, "compression_loss": 0.0, "distillation_loss": 0.20653195679187775, "epoch": 1.15, "learning_rate": 4.6785208118006715e-05, "loss": 0.1989, "step": 1206, "task_loss": 0.1306045949459076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7370934947690234, "compression_loss": 0.0, "distillation_loss": 0.16965758800506592, "epoch": 1.15, "learning_rate": 4.677997911930234e-05, "loss": 0.1598, "step": 1207, "task_loss": 0.07105232030153275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7371196368265492, "compression_loss": 0.0, "distillation_loss": 0.3983684182167053, "epoch": 1.15, "learning_rate": 4.6774746164159854e-05, "loss": 0.3717, "step": 1208, "task_loss": 0.13173796236515045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7371457716405012, "compression_loss": 0.0, "distillation_loss": 0.12735429406166077, "epoch": 1.15, "learning_rate": 4.676950925352986e-05, "loss": 0.1323, "step": 1209, "task_loss": 0.17691615223884583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7371718992118833, "compression_loss": 0.0, "distillation_loss": 0.05615377053618431, "epoch": 1.15, "learning_rate": 4.676426838836367e-05, "loss": 0.054, "step": 1210, "task_loss": 0.03485414758324623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7371980195416992, "compression_loss": 0.0, "distillation_loss": 0.10495641827583313, "epoch": 1.15, "learning_rate": 4.675902356961331e-05, "loss": 0.1051, "step": 1211, "task_loss": 0.1060037836432457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7372241326309523, "compression_loss": 0.0, "distillation_loss": 0.04488696902990341, "epoch": 1.15, "learning_rate": 4.675377479823153e-05, "loss": 0.0409, "step": 1212, "task_loss": 0.00522448867559433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7372502384806465, "compression_loss": 0.0, "distillation_loss": 0.15110138058662415, "epoch": 1.15, "learning_rate": 4.6748522075171784e-05, "loss": 0.1397, "step": 1213, "task_loss": 0.03717401623725891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7372763370917854, "compression_loss": 0.0, "distillation_loss": 0.2912394404411316, "epoch": 1.15, "learning_rate": 4.674326540138826e-05, "loss": 0.2817, "step": 1214, "task_loss": 0.19608113169670105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7373024284653729, "compression_loss": 0.0, "distillation_loss": 0.21200403571128845, "epoch": 1.15, "learning_rate": 4.673800477783587e-05, "loss": 0.2112, "step": 1215, "task_loss": 0.20386581122875214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7373285126024124, "compression_loss": 0.0, "distillation_loss": 0.29864805936813354, "epoch": 1.15, "learning_rate": 4.6732740205470206e-05, "loss": 0.2818, "step": 1216, "task_loss": 0.13044750690460205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7373545895039076, "compression_loss": 0.0, "distillation_loss": 0.12389582395553589, "epoch": 1.16, "learning_rate": 4.672747168524762e-05, "loss": 0.1253, "step": 1217, "task_loss": 0.13781008124351501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7373806591708625, "compression_loss": 0.0, "distillation_loss": 0.04061355069279671, "epoch": 1.16, "learning_rate": 4.672219921812517e-05, "loss": 0.0385, "step": 1218, "task_loss": 0.019226521253585815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7374067216042803, "compression_loss": 0.0, "distillation_loss": 0.11448079347610474, "epoch": 1.16, "learning_rate": 4.671692280506061e-05, "loss": 0.128, "step": 1219, "task_loss": 0.2497006058692932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7374327768051652, "compression_loss": 0.0, "distillation_loss": 0.09137353301048279, "epoch": 1.16, "learning_rate": 4.671164244701243e-05, "loss": 0.0936, "step": 1220, "task_loss": 0.11410032212734222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7374588247745204, "compression_loss": 0.0, "distillation_loss": 0.11541637778282166, "epoch": 1.16, "learning_rate": 4.670635814493984e-05, "loss": 0.1065, "step": 1221, "task_loss": 0.025996150448918343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7374848655133499, "compression_loss": 0.0, "distillation_loss": 0.0969296246767044, "epoch": 1.16, "learning_rate": 4.6701069899802755e-05, "loss": 0.097, "step": 1222, "task_loss": 0.09771576523780823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7375108990226572, "compression_loss": 0.0, "distillation_loss": 0.2986716032028198, "epoch": 1.16, "learning_rate": 4.669577771256181e-05, "loss": 0.2872, "step": 1223, "task_loss": 0.18374918401241302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7375369253034462, "compression_loss": 0.0, "distillation_loss": 0.06657759100198746, "epoch": 1.16, "learning_rate": 4.6690481584178354e-05, "loss": 0.0615, "step": 1224, "task_loss": 0.015912499278783798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7375629443567204, "compression_loss": 0.0, "distillation_loss": 0.20490974187850952, "epoch": 1.16, "learning_rate": 4.6685181515614454e-05, "loss": 0.2002, "step": 1225, "task_loss": 0.15764397382736206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7375889561834835, "compression_loss": 0.0, "distillation_loss": 0.12317511439323425, "epoch": 1.16, "learning_rate": 4.6679877507832895e-05, "loss": 0.1218, "step": 1226, "task_loss": 0.10897918045520782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7376149607847392, "compression_loss": 0.0, "distillation_loss": 0.0427418127655983, "epoch": 1.17, "learning_rate": 4.6674569561797174e-05, "loss": 0.0424, "step": 1227, "task_loss": 0.0389644056558609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7376409581614912, "compression_loss": 0.0, "distillation_loss": 0.14768566191196442, "epoch": 1.17, "learning_rate": 4.666925767847151e-05, "loss": 0.1529, "step": 1228, "task_loss": 0.20031431317329407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7376669483147432, "compression_loss": 0.0, "distillation_loss": 0.09107698500156403, "epoch": 1.17, "learning_rate": 4.6663941858820825e-05, "loss": 0.0864, "step": 1229, "task_loss": 0.04394202679395676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7376929312454988, "compression_loss": 0.0, "distillation_loss": 0.26698851585388184, "epoch": 1.17, "learning_rate": 4.665862210381077e-05, "loss": 0.2655, "step": 1230, "task_loss": 0.2520076036453247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7377189069547618, "compression_loss": 0.0, "distillation_loss": 0.12991863489151, "epoch": 1.17, "learning_rate": 4.66532984144077e-05, "loss": 0.1312, "step": 1231, "task_loss": 0.14297693967819214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7377448754435358, "compression_loss": 0.0, "distillation_loss": 0.22150184214115143, "epoch": 1.17, "learning_rate": 4.6647970791578685e-05, "loss": 0.223, "step": 1232, "task_loss": 0.23616032302379608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7377708367128246, "compression_loss": 0.0, "distillation_loss": 0.32014334201812744, "epoch": 1.17, "learning_rate": 4.664263923629153e-05, "loss": 0.3188, "step": 1233, "task_loss": 0.30650225281715393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7377967907636317, "compression_loss": 0.0, "distillation_loss": 0.485608845949173, "epoch": 1.17, "learning_rate": 4.663730374951472e-05, "loss": 0.4772, "step": 1234, "task_loss": 0.4013437032699585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7378227375969609, "compression_loss": 0.0, "distillation_loss": 0.06522876024246216, "epoch": 1.17, "learning_rate": 4.663196433221747e-05, "loss": 0.0715, "step": 1235, "task_loss": 0.12773452699184418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7378486772138159, "compression_loss": 0.0, "distillation_loss": 0.2558850646018982, "epoch": 1.17, "learning_rate": 4.6626620985369724e-05, "loss": 0.2443, "step": 1236, "task_loss": 0.13986220955848694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7378746096152002, "compression_loss": 0.0, "distillation_loss": 0.07191958278417587, "epoch": 1.17, "learning_rate": 4.662127370994212e-05, "loss": 0.076, "step": 1237, "task_loss": 0.11259147524833679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7379005348021177, "compression_loss": 0.0, "distillation_loss": 0.15925487875938416, "epoch": 1.18, "learning_rate": 4.6615922506906016e-05, "loss": 0.1508, "step": 1238, "task_loss": 0.07494504749774933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7379264527755719, "compression_loss": 0.0, "distillation_loss": 0.216340109705925, "epoch": 1.18, "learning_rate": 4.661056737723349e-05, "loss": 0.2053, "step": 1239, "task_loss": 0.10613324493169785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7379523635365667, "compression_loss": 0.0, "distillation_loss": 0.4159829914569855, "epoch": 1.18, "learning_rate": 4.660520832189732e-05, "loss": 0.4001, "step": 1240, "task_loss": 0.257106214761734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7379782670861056, "compression_loss": 0.0, "distillation_loss": 0.04487886279821396, "epoch": 1.18, "learning_rate": 4.6599845341871005e-05, "loss": 0.0454, "step": 1241, "task_loss": 0.050378091633319855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7380041634251924, "compression_loss": 0.0, "distillation_loss": 0.13262313604354858, "epoch": 1.18, "learning_rate": 4.6594478438128757e-05, "loss": 0.1244, "step": 1242, "task_loss": 0.05053956061601639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7380300525548307, "compression_loss": 0.0, "distillation_loss": 0.06497453153133392, "epoch": 1.18, "learning_rate": 4.6589107611645497e-05, "loss": 0.0752, "step": 1243, "task_loss": 0.16748173534870148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7380559344760241, "compression_loss": 0.0, "distillation_loss": 0.16354522109031677, "epoch": 1.18, "learning_rate": 4.658373286339688e-05, "loss": 0.1567, "step": 1244, "task_loss": 0.09554801136255264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7380818091897765, "compression_loss": 0.0, "distillation_loss": 0.11697202920913696, "epoch": 1.18, "learning_rate": 4.6578354194359227e-05, "loss": 0.1177, "step": 1245, "task_loss": 0.12439711391925812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7381076766970915, "compression_loss": 0.0, "distillation_loss": 0.139469712972641, "epoch": 1.18, "learning_rate": 4.657297160550961e-05, "loss": 0.1305, "step": 1246, "task_loss": 0.04955626651644707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7381335369989727, "compression_loss": 0.0, "distillation_loss": 0.1701054573059082, "epoch": 1.18, "learning_rate": 4.656758509782582e-05, "loss": 0.1753, "step": 1247, "task_loss": 0.22217293083667755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7381593900964238, "compression_loss": 0.0, "distillation_loss": 0.22485214471817017, "epoch": 1.19, "learning_rate": 4.6562194672286306e-05, "loss": 0.2262, "step": 1248, "task_loss": 0.23852680623531342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7381852359904486, "compression_loss": 0.0, "distillation_loss": 0.06678508222103119, "epoch": 1.19, "learning_rate": 4.65568003298703e-05, "loss": 0.0617, "step": 1249, "task_loss": 0.016354495659470558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7382110746820507, "compression_loss": 0.0, "distillation_loss": 0.11139645427465439, "epoch": 1.19, "learning_rate": 4.655140207155769e-05, "loss": 0.1023, "step": 1250, "task_loss": 0.020297054201364517 }, { "epoch": 1.19, "eval_accuracy": 0.8979357798165137, "eval_loss": 0.4085277318954468, "eval_runtime": 18.1765, "eval_samples_per_second": 47.974, "eval_steps_per_second": 5.997, "step": 1250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7382369061722337, "compression_loss": 0.0, "distillation_loss": 0.11869402974843979, "epoch": 1.19, "learning_rate": 4.65459998983291e-05, "loss": 0.1219, "step": 1251, "task_loss": 0.15096434950828552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7382627304620014, "compression_loss": 0.0, "distillation_loss": 0.22661426663398743, "epoch": 1.19, "learning_rate": 4.6540593811165866e-05, "loss": 0.2328, "step": 1252, "task_loss": 0.28801366686820984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7382885475523575, "compression_loss": 0.0, "distillation_loss": 0.03438243269920349, "epoch": 1.19, "learning_rate": 4.653518381105002e-05, "loss": 0.0423, "step": 1253, "task_loss": 0.11402395367622375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7383143574443055, "compression_loss": 0.0, "distillation_loss": 0.03891141712665558, "epoch": 1.19, "learning_rate": 4.6529769898964325e-05, "loss": 0.0358, "step": 1254, "task_loss": 0.007961155846714973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7383401601388494, "compression_loss": 0.0, "distillation_loss": 0.036808304488658905, "epoch": 1.19, "learning_rate": 4.652435207589224e-05, "loss": 0.0468, "step": 1255, "task_loss": 0.1369296759366989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7383659556369926, "compression_loss": 0.0, "distillation_loss": 0.07549430429935455, "epoch": 1.19, "learning_rate": 4.651893034281793e-05, "loss": 0.0702, "step": 1256, "task_loss": 0.02279655635356903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7383917439397388, "compression_loss": 0.0, "distillation_loss": 0.0869937464594841, "epoch": 1.19, "learning_rate": 4.6513504700726293e-05, "loss": 0.0966, "step": 1257, "task_loss": 0.1825573891401291 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7384175250480919, "compression_loss": 0.0, "distillation_loss": 0.1446138620376587, "epoch": 1.19, "learning_rate": 4.650807515060291e-05, "loss": 0.1397, "step": 1258, "task_loss": 0.09526462852954865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7384432989630553, "compression_loss": 0.0, "distillation_loss": 0.15781186521053314, "epoch": 1.2, "learning_rate": 4.650264169343411e-05, "loss": 0.1688, "step": 1259, "task_loss": 0.26753732562065125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7384690656856329, "compression_loss": 0.0, "distillation_loss": 0.06009237468242645, "epoch": 1.2, "learning_rate": 4.6497204330206874e-05, "loss": 0.0683, "step": 1260, "task_loss": 0.14257624745368958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7384948252168283, "compression_loss": 0.0, "distillation_loss": 0.1592351347208023, "epoch": 1.2, "learning_rate": 4.649176306190895e-05, "loss": 0.1589, "step": 1261, "task_loss": 0.15542340278625488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7385205775576451, "compression_loss": 0.0, "distillation_loss": 0.3260449469089508, "epoch": 1.2, "learning_rate": 4.648631788952874e-05, "loss": 0.3149, "step": 1262, "task_loss": 0.21453765034675598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7385463227090872, "compression_loss": 0.0, "distillation_loss": 0.03898075968027115, "epoch": 1.2, "learning_rate": 4.6480868814055424e-05, "loss": 0.0397, "step": 1263, "task_loss": 0.04632849618792534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7385720606721581, "compression_loss": 0.0, "distillation_loss": 0.3080974519252777, "epoch": 1.2, "learning_rate": 4.647541583647883e-05, "loss": 0.2963, "step": 1264, "task_loss": 0.1906224638223648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7385977914478615, "compression_loss": 0.0, "distillation_loss": 0.1396906077861786, "epoch": 1.2, "learning_rate": 4.646995895778952e-05, "loss": 0.1367, "step": 1265, "task_loss": 0.11023291200399399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7386235150372011, "compression_loss": 0.0, "distillation_loss": 0.16227009892463684, "epoch": 1.2, "learning_rate": 4.646449817897876e-05, "loss": 0.1593, "step": 1266, "task_loss": 0.13288387656211853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7386492314411807, "compression_loss": 0.0, "distillation_loss": 0.18255099654197693, "epoch": 1.2, "learning_rate": 4.645903350103855e-05, "loss": 0.1828, "step": 1267, "task_loss": 0.18547126650810242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7386749406608037, "compression_loss": 0.0, "distillation_loss": 0.13566656410694122, "epoch": 1.2, "learning_rate": 4.6453564924961544e-05, "loss": 0.1234, "step": 1268, "task_loss": 0.012727364897727966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7387006426970741, "compression_loss": 0.0, "distillation_loss": 0.10819855332374573, "epoch": 1.21, "learning_rate": 4.644809245174114e-05, "loss": 0.1009, "step": 1269, "task_loss": 0.034790411591529846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7387263375509955, "compression_loss": 0.0, "distillation_loss": 0.2560357451438904, "epoch": 1.21, "learning_rate": 4.6442616082371466e-05, "loss": 0.2434, "step": 1270, "task_loss": 0.12986107170581818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7387520252235714, "compression_loss": 0.0, "distillation_loss": 0.06887871772050858, "epoch": 1.21, "learning_rate": 4.64371358178473e-05, "loss": 0.0706, "step": 1271, "task_loss": 0.08568105101585388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7387777057158056, "compression_loss": 0.0, "distillation_loss": 0.20982679724693298, "epoch": 1.21, "learning_rate": 4.6431651659164174e-05, "loss": 0.2139, "step": 1272, "task_loss": 0.25017955899238586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7388033790287019, "compression_loss": 0.0, "distillation_loss": 0.2947445511817932, "epoch": 1.21, "learning_rate": 4.6426163607318305e-05, "loss": 0.2797, "step": 1273, "task_loss": 0.14412230253219604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7388290451632638, "compression_loss": 0.0, "distillation_loss": 0.10616719722747803, "epoch": 1.21, "learning_rate": 4.642067166330663e-05, "loss": 0.1096, "step": 1274, "task_loss": 0.1407913863658905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.738854704120495, "compression_loss": 0.0, "distillation_loss": 0.262935608625412, "epoch": 1.21, "learning_rate": 4.6415175828126786e-05, "loss": 0.2522, "step": 1275, "task_loss": 0.15595991909503937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7388803559013993, "compression_loss": 0.0, "distillation_loss": 0.08681028336286545, "epoch": 1.21, "learning_rate": 4.640967610277711e-05, "loss": 0.0923, "step": 1276, "task_loss": 0.14123068749904633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7389060005069803, "compression_loss": 0.0, "distillation_loss": 0.2555930018424988, "epoch": 1.21, "learning_rate": 4.640417248825667e-05, "loss": 0.2431, "step": 1277, "task_loss": 0.13036002218723297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7389316379382417, "compression_loss": 0.0, "distillation_loss": 0.03256315737962723, "epoch": 1.21, "learning_rate": 4.63986649855652e-05, "loss": 0.0301, "step": 1278, "task_loss": 0.007671518251299858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7389572681961872, "compression_loss": 0.0, "distillation_loss": 0.06931599974632263, "epoch": 1.21, "learning_rate": 4.639315359570319e-05, "loss": 0.0782, "step": 1279, "task_loss": 0.1577843278646469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7389828912818205, "compression_loss": 0.0, "distillation_loss": 0.09818733483552933, "epoch": 1.22, "learning_rate": 4.6387638319671786e-05, "loss": 0.1179, "step": 1280, "task_loss": 0.2952803671360016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7390085071961452, "compression_loss": 0.0, "distillation_loss": 0.055818259716033936, "epoch": 1.22, "learning_rate": 4.6382119158472895e-05, "loss": 0.0511, "step": 1281, "task_loss": 0.009016238152980804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.739034115940165, "compression_loss": 0.0, "distillation_loss": 0.283183753490448, "epoch": 1.22, "learning_rate": 4.637659611310907e-05, "loss": 0.2704, "step": 1282, "task_loss": 0.15582668781280518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7390597175148836, "compression_loss": 0.0, "distillation_loss": 0.08733191341161728, "epoch": 1.22, "learning_rate": 4.637106918458361e-05, "loss": 0.0902, "step": 1283, "task_loss": 0.11557549238204956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7390853119213048, "compression_loss": 0.0, "distillation_loss": 0.05915270373225212, "epoch": 1.22, "learning_rate": 4.636553837390051e-05, "loss": 0.0552, "step": 1284, "task_loss": 0.01949235051870346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.739110899160432, "compression_loss": 0.0, "distillation_loss": 0.22277674078941345, "epoch": 1.22, "learning_rate": 4.636000368206447e-05, "loss": 0.2188, "step": 1285, "task_loss": 0.18264567852020264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7391364792332692, "compression_loss": 0.0, "distillation_loss": 0.07782046496868134, "epoch": 1.22, "learning_rate": 4.6354465110080885e-05, "loss": 0.0838, "step": 1286, "task_loss": 0.1375577300786972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7391620521408199, "compression_loss": 0.0, "distillation_loss": 0.1343613713979721, "epoch": 1.22, "learning_rate": 4.6348922658955874e-05, "loss": 0.1423, "step": 1287, "task_loss": 0.2140950858592987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7391876178840878, "compression_loss": 0.0, "distillation_loss": 0.24139809608459473, "epoch": 1.22, "learning_rate": 4.634337632969624e-05, "loss": 0.2376, "step": 1288, "task_loss": 0.20292049646377563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7392131764640766, "compression_loss": 0.0, "distillation_loss": 0.1293402463197708, "epoch": 1.22, "learning_rate": 4.6337826123309505e-05, "loss": 0.123, "step": 1289, "task_loss": 0.06586703658103943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.73923872788179, "compression_loss": 0.0, "distillation_loss": 0.09735876321792603, "epoch": 1.23, "learning_rate": 4.6332272040803895e-05, "loss": 0.1097, "step": 1290, "task_loss": 0.22072833776474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7392642721382316, "compression_loss": 0.0, "distillation_loss": 0.30776453018188477, "epoch": 1.23, "learning_rate": 4.632671408318833e-05, "loss": 0.2998, "step": 1291, "task_loss": 0.22781196236610413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7392898092344052, "compression_loss": 0.0, "distillation_loss": 0.13507933914661407, "epoch": 1.23, "learning_rate": 4.6321152251472435e-05, "loss": 0.1333, "step": 1292, "task_loss": 0.11699161678552628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7393153391713145, "compression_loss": 0.0, "distillation_loss": 0.30968376994132996, "epoch": 1.23, "learning_rate": 4.6315586546666556e-05, "loss": 0.3044, "step": 1293, "task_loss": 0.2563600540161133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7393408619499631, "compression_loss": 0.0, "distillation_loss": 0.15666070580482483, "epoch": 1.23, "learning_rate": 4.631001696978172e-05, "loss": 0.1524, "step": 1294, "task_loss": 0.11410398036241531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7393663775713546, "compression_loss": 0.0, "distillation_loss": 0.21059119701385498, "epoch": 1.23, "learning_rate": 4.630444352182968e-05, "loss": 0.1988, "step": 1295, "task_loss": 0.09313502162694931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7393918860364929, "compression_loss": 0.0, "distillation_loss": 0.2058752477169037, "epoch": 1.23, "learning_rate": 4.6298866203822865e-05, "loss": 0.1976, "step": 1296, "task_loss": 0.12271516025066376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7394173873463814, "compression_loss": 0.0, "distillation_loss": 0.03244621679186821, "epoch": 1.23, "learning_rate": 4.629328501677442e-05, "loss": 0.0299, "step": 1297, "task_loss": 0.007005665451288223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7394428815020241, "compression_loss": 0.0, "distillation_loss": 0.05955211818218231, "epoch": 1.23, "learning_rate": 4.6287699961698214e-05, "loss": 0.0587, "step": 1298, "task_loss": 0.050543755292892456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7394683685044244, "compression_loss": 0.0, "distillation_loss": 0.10339696705341339, "epoch": 1.23, "learning_rate": 4.6282111039608784e-05, "loss": 0.1048, "step": 1299, "task_loss": 0.1170087605714798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7394938483545862, "compression_loss": 0.0, "distillation_loss": 0.13691368699073792, "epoch": 1.23, "learning_rate": 4.6276518251521384e-05, "loss": 0.148, "step": 1300, "task_loss": 0.24737101793289185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.739519321053513, "compression_loss": 0.0, "distillation_loss": 0.2226058393716812, "epoch": 1.24, "learning_rate": 4.6270921598451974e-05, "loss": 0.2199, "step": 1301, "task_loss": 0.19590626657009125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7395447866022086, "compression_loss": 0.0, "distillation_loss": 0.25497111678123474, "epoch": 1.24, "learning_rate": 4.6265321081417223e-05, "loss": 0.2433, "step": 1302, "task_loss": 0.13829627633094788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7395702450016767, "compression_loss": 0.0, "distillation_loss": 0.20901359617710114, "epoch": 1.24, "learning_rate": 4.625971670143447e-05, "loss": 0.2123, "step": 1303, "task_loss": 0.242196723818779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7395956962529209, "compression_loss": 0.0, "distillation_loss": 0.08364978432655334, "epoch": 1.24, "learning_rate": 4.625410845952181e-05, "loss": 0.0823, "step": 1304, "task_loss": 0.07054957747459412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7396211403569448, "compression_loss": 0.0, "distillation_loss": 0.18630865216255188, "epoch": 1.24, "learning_rate": 4.6248496356697966e-05, "loss": 0.1769, "step": 1305, "task_loss": 0.09224953502416611 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7396465773147524, "compression_loss": 0.0, "distillation_loss": 0.2079707384109497, "epoch": 1.24, "learning_rate": 4.6242880393982436e-05, "loss": 0.1995, "step": 1306, "task_loss": 0.12356877326965332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.739672007127347, "compression_loss": 0.0, "distillation_loss": 0.2150033861398697, "epoch": 1.24, "learning_rate": 4.623726057239537e-05, "loss": 0.2303, "step": 1307, "task_loss": 0.3683556020259857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7396974297957326, "compression_loss": 0.0, "distillation_loss": 0.21252375841140747, "epoch": 1.24, "learning_rate": 4.623163689295764e-05, "loss": 0.2039, "step": 1308, "task_loss": 0.12652674317359924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7397228453209127, "compression_loss": 0.0, "distillation_loss": 0.21421362459659576, "epoch": 1.24, "learning_rate": 4.6226009356690825e-05, "loss": 0.2026, "step": 1309, "task_loss": 0.09814447164535522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7397482537038911, "compression_loss": 0.0, "distillation_loss": 0.20858995616436005, "epoch": 1.24, "learning_rate": 4.622037796461718e-05, "loss": 0.198, "step": 1310, "task_loss": 0.10299951583147049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7397736549456713, "compression_loss": 0.0, "distillation_loss": 0.2604760527610779, "epoch": 1.25, "learning_rate": 4.621474271775968e-05, "loss": 0.2562, "step": 1311, "task_loss": 0.21733561158180237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.739799049047257, "compression_loss": 0.0, "distillation_loss": 0.07183189690113068, "epoch": 1.25, "learning_rate": 4.620910361714199e-05, "loss": 0.068, "step": 1312, "task_loss": 0.03307514637708664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7398244360096521, "compression_loss": 0.0, "distillation_loss": 0.19191870093345642, "epoch": 1.25, "learning_rate": 4.620346066378849e-05, "loss": 0.1822, "step": 1313, "task_loss": 0.09521719813346863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7398498158338602, "compression_loss": 0.0, "distillation_loss": 0.1862865388393402, "epoch": 1.25, "learning_rate": 4.619781385872424e-05, "loss": 0.1838, "step": 1314, "task_loss": 0.16108401119709015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7398751885208849, "compression_loss": 0.0, "distillation_loss": 0.28807079792022705, "epoch": 1.25, "learning_rate": 4.6192163202975013e-05, "loss": 0.2742, "step": 1315, "task_loss": 0.14911451935768127 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7399005540717298, "compression_loss": 0.0, "distillation_loss": 0.07601656764745712, "epoch": 1.25, "learning_rate": 4.618650869756728e-05, "loss": 0.071, "step": 1316, "task_loss": 0.025538241490721703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7399259124873989, "compression_loss": 0.0, "distillation_loss": 0.08642975986003876, "epoch": 1.25, "learning_rate": 4.6180850343528205e-05, "loss": 0.0799, "step": 1317, "task_loss": 0.021204736083745956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7399512637688955, "compression_loss": 0.0, "distillation_loss": 0.10312186181545258, "epoch": 1.25, "learning_rate": 4.617518814188565e-05, "loss": 0.0966, "step": 1318, "task_loss": 0.037991687655448914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7399766079172236, "compression_loss": 0.0, "distillation_loss": 0.2704732418060303, "epoch": 1.25, "learning_rate": 4.6169522093668196e-05, "loss": 0.2797, "step": 1319, "task_loss": 0.3626573085784912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7400019449333867, "compression_loss": 0.0, "distillation_loss": 0.10321257263422012, "epoch": 1.25, "learning_rate": 4.61638521999051e-05, "loss": 0.0959, "step": 1320, "task_loss": 0.030156267806887627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7400272748183885, "compression_loss": 0.0, "distillation_loss": 0.5204591751098633, "epoch": 1.25, "learning_rate": 4.6158178461626323e-05, "loss": 0.5081, "step": 1321, "task_loss": 0.3965243101119995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7400525975732327, "compression_loss": 0.0, "distillation_loss": 0.23917892575263977, "epoch": 1.26, "learning_rate": 4.615250087986254e-05, "loss": 0.2394, "step": 1322, "task_loss": 0.24161620438098907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.740077913198923, "compression_loss": 0.0, "distillation_loss": 0.09203709661960602, "epoch": 1.26, "learning_rate": 4.6146819455645086e-05, "loss": 0.0865, "step": 1323, "task_loss": 0.037142425775527954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7401032216964631, "compression_loss": 0.0, "distillation_loss": 0.10284596681594849, "epoch": 1.26, "learning_rate": 4.614113419000604e-05, "loss": 0.1036, "step": 1324, "task_loss": 0.11075641214847565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7401285230668567, "compression_loss": 0.0, "distillation_loss": 0.2411002814769745, "epoch": 1.26, "learning_rate": 4.613544508397815e-05, "loss": 0.2294, "step": 1325, "task_loss": 0.12409268319606781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7401538173111074, "compression_loss": 0.0, "distillation_loss": 0.08729246258735657, "epoch": 1.26, "learning_rate": 4.6129752138594874e-05, "loss": 0.0873, "step": 1326, "task_loss": 0.0873517096042633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.740179104430219, "compression_loss": 0.0, "distillation_loss": 0.13888351619243622, "epoch": 1.26, "learning_rate": 4.612405535489036e-05, "loss": 0.1341, "step": 1327, "task_loss": 0.09130087494850159 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.740204384425195, "compression_loss": 0.0, "distillation_loss": 0.1486589014530182, "epoch": 1.26, "learning_rate": 4.611835473389945e-05, "loss": 0.148, "step": 1328, "task_loss": 0.14163073897361755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7402296572970392, "compression_loss": 0.0, "distillation_loss": 0.352372944355011, "epoch": 1.26, "learning_rate": 4.61126502766577e-05, "loss": 0.3364, "step": 1329, "task_loss": 0.192458838224411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7402549230467552, "compression_loss": 0.0, "distillation_loss": 0.09347756952047348, "epoch": 1.26, "learning_rate": 4.6106941984201344e-05, "loss": 0.1093, "step": 1330, "task_loss": 0.251775860786438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7402801816753469, "compression_loss": 0.0, "distillation_loss": 0.16438814997673035, "epoch": 1.26, "learning_rate": 4.610122985756733e-05, "loss": 0.158, "step": 1331, "task_loss": 0.10030940920114517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7403054331838178, "compression_loss": 0.0, "distillation_loss": 0.186666801571846, "epoch": 1.26, "learning_rate": 4.609551389779328e-05, "loss": 0.1849, "step": 1332, "task_loss": 0.16852930188179016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7403306775731715, "compression_loss": 0.0, "distillation_loss": 0.0614393912255764, "epoch": 1.27, "learning_rate": 4.6089794105917544e-05, "loss": 0.063, "step": 1333, "task_loss": 0.0771103948354721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7403559148444119, "compression_loss": 0.0, "distillation_loss": 0.28277117013931274, "epoch": 1.27, "learning_rate": 4.6084070482979135e-05, "loss": 0.2787, "step": 1334, "task_loss": 0.24248361587524414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7403811449985426, "compression_loss": 0.0, "distillation_loss": 0.24766165018081665, "epoch": 1.27, "learning_rate": 4.607834303001778e-05, "loss": 0.2406, "step": 1335, "task_loss": 0.17734551429748535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7404063680365671, "compression_loss": 0.0, "distillation_loss": 0.21882733702659607, "epoch": 1.27, "learning_rate": 4.60726117480739e-05, "loss": 0.2158, "step": 1336, "task_loss": 0.18877999484539032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7404315839594894, "compression_loss": 0.0, "distillation_loss": 0.1295139193534851, "epoch": 1.27, "learning_rate": 4.6066876638188604e-05, "loss": 0.1317, "step": 1337, "task_loss": 0.151097372174263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.740456792768313, "compression_loss": 0.0, "distillation_loss": 0.199735626578331, "epoch": 1.27, "learning_rate": 4.606113770140371e-05, "loss": 0.1898, "step": 1338, "task_loss": 0.10075341165065765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7404819944640416, "compression_loss": 0.0, "distillation_loss": 0.2485513985157013, "epoch": 1.27, "learning_rate": 4.605539493876173e-05, "loss": 0.2438, "step": 1339, "task_loss": 0.20112858712673187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7405071890476789, "compression_loss": 0.0, "distillation_loss": 0.2236090898513794, "epoch": 1.27, "learning_rate": 4.604964835130585e-05, "loss": 0.2099, "step": 1340, "task_loss": 0.08700668066740036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7405323765202285, "compression_loss": 0.0, "distillation_loss": 0.062271565198898315, "epoch": 1.27, "learning_rate": 4.6043897940079964e-05, "loss": 0.0583, "step": 1341, "task_loss": 0.022575678303837776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7405575568826942, "compression_loss": 0.0, "distillation_loss": 0.08404142409563065, "epoch": 1.27, "learning_rate": 4.603814370612867e-05, "loss": 0.078, "step": 1342, "task_loss": 0.02370348386466503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7405827301360796, "compression_loss": 0.0, "distillation_loss": 0.2024337202310562, "epoch": 1.28, "learning_rate": 4.603238565049726e-05, "loss": 0.1899, "step": 1343, "task_loss": 0.07661712914705276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7406078962813885, "compression_loss": 0.0, "distillation_loss": 0.29442059993743896, "epoch": 1.28, "learning_rate": 4.60266237742317e-05, "loss": 0.279, "step": 1344, "task_loss": 0.1398637890815735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7406330553196244, "compression_loss": 0.0, "distillation_loss": 0.03961509093642235, "epoch": 1.28, "learning_rate": 4.602085807837866e-05, "loss": 0.0527, "step": 1345, "task_loss": 0.1707422137260437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7406582072517911, "compression_loss": 0.0, "distillation_loss": 0.3172229826450348, "epoch": 1.28, "learning_rate": 4.601508856398552e-05, "loss": 0.3057, "step": 1346, "task_loss": 0.20200252532958984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7406833520788924, "compression_loss": 0.0, "distillation_loss": 0.1311604082584381, "epoch": 1.28, "learning_rate": 4.6009315232100324e-05, "loss": 0.1316, "step": 1347, "task_loss": 0.13595682382583618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7407084898019316, "compression_loss": 0.0, "distillation_loss": 0.08850497007369995, "epoch": 1.28, "learning_rate": 4.600353808377184e-05, "loss": 0.0874, "step": 1348, "task_loss": 0.07742930948734283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7407336204219128, "compression_loss": 0.0, "distillation_loss": 0.03612298145890236, "epoch": 1.28, "learning_rate": 4.599775712004951e-05, "loss": 0.0333, "step": 1349, "task_loss": 0.007720911875367165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7407587439398395, "compression_loss": 0.0, "distillation_loss": 0.552012026309967, "epoch": 1.28, "learning_rate": 4.599197234198347e-05, "loss": 0.5405, "step": 1350, "task_loss": 0.4371148347854614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7407838603567154, "compression_loss": 0.0, "distillation_loss": 0.07328462600708008, "epoch": 1.28, "learning_rate": 4.5986183750624555e-05, "loss": 0.0754, "step": 1351, "task_loss": 0.09475519508123398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7408089696735441, "compression_loss": 0.0, "distillation_loss": 0.2518464922904968, "epoch": 1.28, "learning_rate": 4.5980391347024296e-05, "loss": 0.2416, "step": 1352, "task_loss": 0.14951427280902863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7408340718913294, "compression_loss": 0.0, "distillation_loss": 0.28138667345046997, "epoch": 1.28, "learning_rate": 4.59745951322349e-05, "loss": 0.2688, "step": 1353, "task_loss": 0.15557241439819336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7408591670110749, "compression_loss": 0.0, "distillation_loss": 0.11055044829845428, "epoch": 1.29, "learning_rate": 4.596879510730929e-05, "loss": 0.1117, "step": 1354, "task_loss": 0.12174604833126068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7408842550337844, "compression_loss": 0.0, "distillation_loss": 0.0641191229224205, "epoch": 1.29, "learning_rate": 4.596299127330106e-05, "loss": 0.0583, "step": 1355, "task_loss": 0.0056402478367090225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7409093359604615, "compression_loss": 0.0, "distillation_loss": 0.06709885597229004, "epoch": 1.29, "learning_rate": 4.59571836312645e-05, "loss": 0.0675, "step": 1356, "task_loss": 0.07135514914989471 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7409344097921098, "compression_loss": 0.0, "distillation_loss": 0.1536739617586136, "epoch": 1.29, "learning_rate": 4.595137218225461e-05, "loss": 0.1479, "step": 1357, "task_loss": 0.09615078568458557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7409594765297332, "compression_loss": 0.0, "distillation_loss": 0.2070506066083908, "epoch": 1.29, "learning_rate": 4.594555692732706e-05, "loss": 0.2038, "step": 1358, "task_loss": 0.1743832379579544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.740984536174335, "compression_loss": 0.0, "distillation_loss": 0.24169021844863892, "epoch": 1.29, "learning_rate": 4.593973786753821e-05, "loss": 0.2442, "step": 1359, "task_loss": 0.2664361894130707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7410095887269194, "compression_loss": 0.0, "distillation_loss": 0.2801350951194763, "epoch": 1.29, "learning_rate": 4.593391500394514e-05, "loss": 0.2794, "step": 1360, "task_loss": 0.2726552486419678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7410346341884897, "compression_loss": 0.0, "distillation_loss": 0.09238822013139725, "epoch": 1.29, "learning_rate": 4.5928088337605586e-05, "loss": 0.0886, "step": 1361, "task_loss": 0.05469810217618942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7410596725600497, "compression_loss": 0.0, "distillation_loss": 0.09862814098596573, "epoch": 1.29, "learning_rate": 4.5922257869578e-05, "loss": 0.0927, "step": 1362, "task_loss": 0.0397910512983799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741084703842603, "compression_loss": 0.0, "distillation_loss": 0.18891645967960358, "epoch": 1.29, "learning_rate": 4.5916423600921496e-05, "loss": 0.1851, "step": 1363, "task_loss": 0.1504015326499939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7411097280371535, "compression_loss": 0.0, "distillation_loss": 0.34903091192245483, "epoch": 1.3, "learning_rate": 4.591058553269593e-05, "loss": 0.3357, "step": 1364, "task_loss": 0.21621909737586975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7411347451447046, "compression_loss": 0.0, "distillation_loss": 0.0994444340467453, "epoch": 1.3, "learning_rate": 4.590474366596178e-05, "loss": 0.1141, "step": 1365, "task_loss": 0.24601247906684875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7411597551662602, "compression_loss": 0.0, "distillation_loss": 0.11619491875171661, "epoch": 1.3, "learning_rate": 4.589889800178026e-05, "loss": 0.1112, "step": 1366, "task_loss": 0.06617462635040283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7411847581028239, "compression_loss": 0.0, "distillation_loss": 0.08728669583797455, "epoch": 1.3, "learning_rate": 4.589304854121329e-05, "loss": 0.0875, "step": 1367, "task_loss": 0.08968639373779297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7412097539553995, "compression_loss": 0.0, "distillation_loss": 0.13692383468151093, "epoch": 1.3, "learning_rate": 4.588719528532342e-05, "loss": 0.1391, "step": 1368, "task_loss": 0.15917925536632538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7412347427249903, "compression_loss": 0.0, "distillation_loss": 0.2873116433620453, "epoch": 1.3, "learning_rate": 4.588133823517392e-05, "loss": 0.2825, "step": 1369, "task_loss": 0.23926284909248352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7412597244126005, "compression_loss": 0.0, "distillation_loss": 0.22201889753341675, "epoch": 1.3, "learning_rate": 4.587547739182878e-05, "loss": 0.2193, "step": 1370, "task_loss": 0.19527406990528107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7412846990192334, "compression_loss": 0.0, "distillation_loss": 0.15326130390167236, "epoch": 1.3, "learning_rate": 4.586961275635263e-05, "loss": 0.1554, "step": 1371, "task_loss": 0.17474180459976196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7413096665458929, "compression_loss": 0.0, "distillation_loss": 0.1608862280845642, "epoch": 1.3, "learning_rate": 4.586374432981081e-05, "loss": 0.1622, "step": 1372, "task_loss": 0.17383195459842682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7413346269935825, "compression_loss": 0.0, "distillation_loss": 0.05019424855709076, "epoch": 1.3, "learning_rate": 4.585787211326935e-05, "loss": 0.0561, "step": 1373, "task_loss": 0.1092485561966896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7413595803633061, "compression_loss": 0.0, "distillation_loss": 0.45158857107162476, "epoch": 1.3, "learning_rate": 4.5851996107794975e-05, "loss": 0.4364, "step": 1374, "task_loss": 0.2999117970466614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7413845266560671, "compression_loss": 0.0, "distillation_loss": 0.128067284822464, "epoch": 1.31, "learning_rate": 4.584611631445508e-05, "loss": 0.1328, "step": 1375, "task_loss": 0.1756190061569214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7414094658728695, "compression_loss": 0.0, "distillation_loss": 0.1457686424255371, "epoch": 1.31, "learning_rate": 4.5840232734317754e-05, "loss": 0.1414, "step": 1376, "task_loss": 0.10216192901134491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7414343980147168, "compression_loss": 0.0, "distillation_loss": 0.33278051018714905, "epoch": 1.31, "learning_rate": 4.583434536845179e-05, "loss": 0.3246, "step": 1377, "task_loss": 0.2506526708602905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7414593230826126, "compression_loss": 0.0, "distillation_loss": 0.1251896768808365, "epoch": 1.31, "learning_rate": 4.5828454217926654e-05, "loss": 0.1325, "step": 1378, "task_loss": 0.19821739196777344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7414842410775608, "compression_loss": 0.0, "distillation_loss": 0.18593353033065796, "epoch": 1.31, "learning_rate": 4.5822559283812496e-05, "loss": 0.1785, "step": 1379, "task_loss": 0.11133649945259094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741509152000565, "compression_loss": 0.0, "distillation_loss": 0.058180175721645355, "epoch": 1.31, "learning_rate": 4.581666056718016e-05, "loss": 0.0539, "step": 1380, "task_loss": 0.015172762796282768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7415340558526288, "compression_loss": 0.0, "distillation_loss": 0.2561149001121521, "epoch": 1.31, "learning_rate": 4.5810758069101175e-05, "loss": 0.2525, "step": 1381, "task_loss": 0.2195451259613037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7415589526347559, "compression_loss": 0.0, "distillation_loss": 0.3725208044052124, "epoch": 1.31, "learning_rate": 4.580485179064777e-05, "loss": 0.3704, "step": 1382, "task_loss": 0.350993275642395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.74158384234795, "compression_loss": 0.0, "distillation_loss": 0.03700024634599686, "epoch": 1.31, "learning_rate": 4.579894173289284e-05, "loss": 0.0392, "step": 1383, "task_loss": 0.0591222308576107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7416087249932148, "compression_loss": 0.0, "distillation_loss": 0.12477545440196991, "epoch": 1.31, "learning_rate": 4.579302789690997e-05, "loss": 0.1158, "step": 1384, "task_loss": 0.03471755236387253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7416336005715541, "compression_loss": 0.0, "distillation_loss": 0.1321507692337036, "epoch": 1.32, "learning_rate": 4.578711028377344e-05, "loss": 0.1349, "step": 1385, "task_loss": 0.1596865952014923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7416584690839714, "compression_loss": 0.0, "distillation_loss": 0.09508243948221207, "epoch": 1.32, "learning_rate": 4.578118889455821e-05, "loss": 0.0901, "step": 1386, "task_loss": 0.04528198391199112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7416833305314704, "compression_loss": 0.0, "distillation_loss": 0.09540075063705444, "epoch": 1.32, "learning_rate": 4.577526373033994e-05, "loss": 0.108, "step": 1387, "task_loss": 0.22137771546840668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741708184915055, "compression_loss": 0.0, "distillation_loss": 0.14678335189819336, "epoch": 1.32, "learning_rate": 4.576933479219496e-05, "loss": 0.1414, "step": 1388, "task_loss": 0.0929180309176445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7417330322357285, "compression_loss": 0.0, "distillation_loss": 0.18357060849666595, "epoch": 1.32, "learning_rate": 4.5763402081200294e-05, "loss": 0.1835, "step": 1389, "task_loss": 0.18299201130867004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7417578724944949, "compression_loss": 0.0, "distillation_loss": 0.18099266290664673, "epoch": 1.32, "learning_rate": 4.575746559843364e-05, "loss": 0.176, "step": 1390, "task_loss": 0.13100393116474152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7417827056923577, "compression_loss": 0.0, "distillation_loss": 0.03351439908146858, "epoch": 1.32, "learning_rate": 4.5751525344973384e-05, "loss": 0.0415, "step": 1391, "task_loss": 0.11322241276502609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7418075318303208, "compression_loss": 0.0, "distillation_loss": 0.06732058525085449, "epoch": 1.32, "learning_rate": 4.5745581321898615e-05, "loss": 0.0621, "step": 1392, "task_loss": 0.015335185453295708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7418323509093876, "compression_loss": 0.0, "distillation_loss": 0.2339862734079361, "epoch": 1.32, "learning_rate": 4.5739633530289085e-05, "loss": 0.2269, "step": 1393, "task_loss": 0.16297489404678345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741857162930562, "compression_loss": 0.0, "distillation_loss": 0.042287249118089676, "epoch": 1.32, "learning_rate": 4.573368197122524e-05, "loss": 0.0504, "step": 1394, "task_loss": 0.12305901199579239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7418819678948476, "compression_loss": 0.0, "distillation_loss": 0.20534148812294006, "epoch": 1.32, "learning_rate": 4.572772664578821e-05, "loss": 0.1949, "step": 1395, "task_loss": 0.10102957487106323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741906765803248, "compression_loss": 0.0, "distillation_loss": 0.056711532175540924, "epoch": 1.33, "learning_rate": 4.572176755505981e-05, "loss": 0.0517, "step": 1396, "task_loss": 0.006524372845888138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.741931556656767, "compression_loss": 0.0, "distillation_loss": 0.04466833174228668, "epoch": 1.33, "learning_rate": 4.571580470012254e-05, "loss": 0.0415, "step": 1397, "task_loss": 0.013324148952960968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7419563404564083, "compression_loss": 0.0, "distillation_loss": 0.1416427046060562, "epoch": 1.33, "learning_rate": 4.5709838082059574e-05, "loss": 0.1331, "step": 1398, "task_loss": 0.05632413551211357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7419811172031756, "compression_loss": 0.0, "distillation_loss": 0.1042378842830658, "epoch": 1.33, "learning_rate": 4.570386770195478e-05, "loss": 0.0981, "step": 1399, "task_loss": 0.04313955828547478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7420058868980723, "compression_loss": 0.0, "distillation_loss": 0.1780770719051361, "epoch": 1.33, "learning_rate": 4.569789356089271e-05, "loss": 0.177, "step": 1400, "task_loss": 0.16733302175998688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7420306495421025, "compression_loss": 0.0, "distillation_loss": 0.258390873670578, "epoch": 1.33, "learning_rate": 4.569191565995859e-05, "loss": 0.2603, "step": 1401, "task_loss": 0.2771160304546356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7420554051362696, "compression_loss": 0.0, "distillation_loss": 0.3139894902706146, "epoch": 1.33, "learning_rate": 4.568593400023834e-05, "loss": 0.3055, "step": 1402, "task_loss": 0.22894920408725739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7420801536815774, "compression_loss": 0.0, "distillation_loss": 0.17985640466213226, "epoch": 1.33, "learning_rate": 4.567994858281855e-05, "loss": 0.1712, "step": 1403, "task_loss": 0.09336966276168823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7421048951790294, "compression_loss": 0.0, "distillation_loss": 0.19855880737304688, "epoch": 1.33, "learning_rate": 4.56739594087865e-05, "loss": 0.2139, "step": 1404, "task_loss": 0.35179561376571655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7421296296296296, "compression_loss": 0.0, "distillation_loss": 0.4204038977622986, "epoch": 1.33, "learning_rate": 4.566796647923017e-05, "loss": 0.4184, "step": 1405, "task_loss": 0.4005380868911743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7421543570343815, "compression_loss": 0.0, "distillation_loss": 0.18849390745162964, "epoch": 1.34, "learning_rate": 4.566196979523818e-05, "loss": 0.1992, "step": 1406, "task_loss": 0.2956838607788086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7421790773942887, "compression_loss": 0.0, "distillation_loss": 0.39693641662597656, "epoch": 1.34, "learning_rate": 4.5655969357899874e-05, "loss": 0.3956, "step": 1407, "task_loss": 0.38323667645454407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.742203790710355, "compression_loss": 0.0, "distillation_loss": 0.21428659558296204, "epoch": 1.34, "learning_rate": 4.564996516830525e-05, "loss": 0.2072, "step": 1408, "task_loss": 0.14346663653850555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7422284969835841, "compression_loss": 0.0, "distillation_loss": 0.15829120576381683, "epoch": 1.34, "learning_rate": 4.564395722754501e-05, "loss": 0.1601, "step": 1409, "task_loss": 0.17646542191505432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7422531962149795, "compression_loss": 0.0, "distillation_loss": 0.13824407756328583, "epoch": 1.34, "learning_rate": 4.56379455367105e-05, "loss": 0.129, "step": 1410, "task_loss": 0.04564966261386871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7422778884055451, "compression_loss": 0.0, "distillation_loss": 0.05707935988903046, "epoch": 1.34, "learning_rate": 4.563193009689381e-05, "loss": 0.0538, "step": 1411, "task_loss": 0.023959307000041008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7423025735562846, "compression_loss": 0.0, "distillation_loss": 0.42523133754730225, "epoch": 1.34, "learning_rate": 4.562591090918764e-05, "loss": 0.4085, "step": 1412, "task_loss": 0.2577897012233734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7423272516682015, "compression_loss": 0.0, "distillation_loss": 0.37662801146507263, "epoch": 1.34, "learning_rate": 4.561988797468542e-05, "loss": 0.3691, "step": 1413, "task_loss": 0.30150848627090454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7423519227422994, "compression_loss": 0.0, "distillation_loss": 0.21535269916057587, "epoch": 1.34, "learning_rate": 4.561386129448125e-05, "loss": 0.205, "step": 1414, "task_loss": 0.11162030696868896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7423765867795824, "compression_loss": 0.0, "distillation_loss": 0.14912311732769012, "epoch": 1.34, "learning_rate": 4.5607830869669885e-05, "loss": 0.1563, "step": 1415, "task_loss": 0.22072558104991913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7424012437810538, "compression_loss": 0.0, "distillation_loss": 0.1851930022239685, "epoch": 1.34, "learning_rate": 4.560179670134681e-05, "loss": 0.1752, "step": 1416, "task_loss": 0.08566315472126007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7424258937477174, "compression_loss": 0.0, "distillation_loss": 0.14120402932167053, "epoch": 1.35, "learning_rate": 4.559575879060813e-05, "loss": 0.1323, "step": 1417, "task_loss": 0.05225841701030731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7424505366805769, "compression_loss": 0.0, "distillation_loss": 0.29736214876174927, "epoch": 1.35, "learning_rate": 4.5589717138550685e-05, "loss": 0.2882, "step": 1418, "task_loss": 0.20604415237903595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.742475172580636, "compression_loss": 0.0, "distillation_loss": 0.08903981745243073, "epoch": 1.35, "learning_rate": 4.5583671746271964e-05, "loss": 0.0856, "step": 1419, "task_loss": 0.054539553821086884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7424998014488984, "compression_loss": 0.0, "distillation_loss": 0.0935673713684082, "epoch": 1.35, "learning_rate": 4.557762261487013e-05, "loss": 0.0914, "step": 1420, "task_loss": 0.07140633463859558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7425244232863677, "compression_loss": 0.0, "distillation_loss": 0.19990570843219757, "epoch": 1.35, "learning_rate": 4.557156974544404e-05, "loss": 0.1898, "step": 1421, "task_loss": 0.09906322509050369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7425490380940476, "compression_loss": 0.0, "distillation_loss": 0.24201074242591858, "epoch": 1.35, "learning_rate": 4.5565513139093244e-05, "loss": 0.2452, "step": 1422, "task_loss": 0.27348941564559937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7425736458729418, "compression_loss": 0.0, "distillation_loss": 0.2360510677099228, "epoch": 1.35, "learning_rate": 4.5559452796917936e-05, "loss": 0.2237, "step": 1423, "task_loss": 0.1127673014998436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.742598246624054, "compression_loss": 0.0, "distillation_loss": 0.15818031132221222, "epoch": 1.35, "learning_rate": 4.555338872001901e-05, "loss": 0.1643, "step": 1424, "task_loss": 0.2189611941576004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7426228403483879, "compression_loss": 0.0, "distillation_loss": 0.45690128207206726, "epoch": 1.35, "learning_rate": 4.554732090949805e-05, "loss": 0.4419, "step": 1425, "task_loss": 0.30732840299606323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7426474270469471, "compression_loss": 0.0, "distillation_loss": 0.15233120322227478, "epoch": 1.35, "learning_rate": 4.5541249366457276e-05, "loss": 0.1699, "step": 1426, "task_loss": 0.32829368114471436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7426720067207354, "compression_loss": 0.0, "distillation_loss": 0.24415679275989532, "epoch": 1.36, "learning_rate": 4.5535174091999636e-05, "loss": 0.2323, "step": 1427, "task_loss": 0.12578970193862915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7426965793707563, "compression_loss": 0.0, "distillation_loss": 0.10446126759052277, "epoch": 1.36, "learning_rate": 4.552909508722871e-05, "loss": 0.0981, "step": 1428, "task_loss": 0.040688008069992065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7427211449980137, "compression_loss": 0.0, "distillation_loss": 0.13874554634094238, "epoch": 1.36, "learning_rate": 4.55230123532488e-05, "loss": 0.1403, "step": 1429, "task_loss": 0.15391620993614197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7427457036035111, "compression_loss": 0.0, "distillation_loss": 0.04957931116223335, "epoch": 1.36, "learning_rate": 4.551692589116486e-05, "loss": 0.058, "step": 1430, "task_loss": 0.13414397835731506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7427702551882523, "compression_loss": 0.0, "distillation_loss": 0.18771857023239136, "epoch": 1.36, "learning_rate": 4.551083570208252e-05, "loss": 0.1799, "step": 1431, "task_loss": 0.10969773679971695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7427947997532409, "compression_loss": 0.0, "distillation_loss": 0.2879871129989624, "epoch": 1.36, "learning_rate": 4.550474178710809e-05, "loss": 0.2745, "step": 1432, "task_loss": 0.15289351344108582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7428193372994807, "compression_loss": 0.0, "distillation_loss": 0.0784875899553299, "epoch": 1.36, "learning_rate": 4.549864414734856e-05, "loss": 0.0731, "step": 1433, "task_loss": 0.02427070587873459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7428438678279753, "compression_loss": 0.0, "distillation_loss": 0.08174806833267212, "epoch": 1.36, "learning_rate": 4.54925427839116e-05, "loss": 0.0832, "step": 1434, "task_loss": 0.09647224843502045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7428683913397283, "compression_loss": 0.0, "distillation_loss": 0.1861201375722885, "epoch": 1.36, "learning_rate": 4.548643769790556e-05, "loss": 0.1766, "step": 1435, "task_loss": 0.09077153354883194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7428929078357436, "compression_loss": 0.0, "distillation_loss": 0.1611296832561493, "epoch": 1.36, "learning_rate": 4.548032889043944e-05, "loss": 0.169, "step": 1436, "task_loss": 0.2397400438785553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7429174173170245, "compression_loss": 0.0, "distillation_loss": 0.14555801451206207, "epoch": 1.36, "learning_rate": 4.547421636262294e-05, "loss": 0.1467, "step": 1437, "task_loss": 0.1570826917886734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7429419197845752, "compression_loss": 0.0, "distillation_loss": 0.23441661894321442, "epoch": 1.37, "learning_rate": 4.546810011556644e-05, "loss": 0.2309, "step": 1438, "task_loss": 0.19945275783538818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.742966415239399, "compression_loss": 0.0, "distillation_loss": 0.1089128702878952, "epoch": 1.37, "learning_rate": 4.546198015038097e-05, "loss": 0.1146, "step": 1439, "task_loss": 0.16534297168254852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7429909036824998, "compression_loss": 0.0, "distillation_loss": 0.2072802484035492, "epoch": 1.37, "learning_rate": 4.545585646817826e-05, "loss": 0.1989, "step": 1440, "task_loss": 0.12354776263237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7430153851148811, "compression_loss": 0.0, "distillation_loss": 0.3501192629337311, "epoch": 1.37, "learning_rate": 4.544972907007071e-05, "loss": 0.335, "step": 1441, "task_loss": 0.19924892485141754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7430398595375466, "compression_loss": 0.0, "distillation_loss": 0.45086967945098877, "epoch": 1.37, "learning_rate": 4.544359795717139e-05, "loss": 0.4323, "step": 1442, "task_loss": 0.265533447265625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7430643269515002, "compression_loss": 0.0, "distillation_loss": 0.12026548385620117, "epoch": 1.37, "learning_rate": 4.543746313059404e-05, "loss": 0.1146, "step": 1443, "task_loss": 0.063462033867836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7430887873577453, "compression_loss": 0.0, "distillation_loss": 0.17728132009506226, "epoch": 1.37, "learning_rate": 4.5431324591453094e-05, "loss": 0.1698, "step": 1444, "task_loss": 0.10240338742733002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7431132407572857, "compression_loss": 0.0, "distillation_loss": 0.3929315209388733, "epoch": 1.37, "learning_rate": 4.5425182340863626e-05, "loss": 0.3889, "step": 1445, "task_loss": 0.3523111641407013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7431376871511252, "compression_loss": 0.0, "distillation_loss": 0.14356978237628937, "epoch": 1.37, "learning_rate": 4.541903637994142e-05, "loss": 0.1433, "step": 1446, "task_loss": 0.14127568900585175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7431621265402673, "compression_loss": 0.0, "distillation_loss": 0.18327973783016205, "epoch": 1.37, "learning_rate": 4.541288670980291e-05, "loss": 0.171, "step": 1447, "task_loss": 0.060883235186338425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7431865589257157, "compression_loss": 0.0, "distillation_loss": 0.12613250315189362, "epoch": 1.38, "learning_rate": 4.540673333156523e-05, "loss": 0.1266, "step": 1448, "task_loss": 0.1305573284626007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7432109843084742, "compression_loss": 0.0, "distillation_loss": 0.11743993312120438, "epoch": 1.38, "learning_rate": 4.540057624634616e-05, "loss": 0.11, "step": 1449, "task_loss": 0.04330931603908539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7432354026895465, "compression_loss": 0.0, "distillation_loss": 0.16623345017433167, "epoch": 1.38, "learning_rate": 4.5394415455264164e-05, "loss": 0.1572, "step": 1450, "task_loss": 0.07557562738656998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.743259814069936, "compression_loss": 0.0, "distillation_loss": 0.20014873147010803, "epoch": 1.38, "learning_rate": 4.538825095943838e-05, "loss": 0.2043, "step": 1451, "task_loss": 0.24151219427585602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7432842184506467, "compression_loss": 0.0, "distillation_loss": 0.14636574685573578, "epoch": 1.38, "learning_rate": 4.538208275998861e-05, "loss": 0.1444, "step": 1452, "task_loss": 0.12678012251853943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7433086158326822, "compression_loss": 0.0, "distillation_loss": 0.12708376348018646, "epoch": 1.38, "learning_rate": 4.537591085803535e-05, "loss": 0.1272, "step": 1453, "task_loss": 0.1282857060432434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7433330062170461, "compression_loss": 0.0, "distillation_loss": 0.18036913871765137, "epoch": 1.38, "learning_rate": 4.5369735254699754e-05, "loss": 0.1817, "step": 1454, "task_loss": 0.19413068890571594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7433573896047422, "compression_loss": 0.0, "distillation_loss": 0.13496941328048706, "epoch": 1.38, "learning_rate": 4.536355595110365e-05, "loss": 0.1332, "step": 1455, "task_loss": 0.11723415553569794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.743381765996774, "compression_loss": 0.0, "distillation_loss": 0.19335393607616425, "epoch": 1.38, "learning_rate": 4.5357372948369534e-05, "loss": 0.1765, "step": 1456, "task_loss": 0.024988338351249695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7434061353941454, "compression_loss": 0.0, "distillation_loss": 0.19088754057884216, "epoch": 1.38, "learning_rate": 4.535118624762057e-05, "loss": 0.1783, "step": 1457, "task_loss": 0.06504514068365097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7434304977978599, "compression_loss": 0.0, "distillation_loss": 0.059609562158584595, "epoch": 1.38, "learning_rate": 4.534499584998062e-05, "loss": 0.0554, "step": 1458, "task_loss": 0.01706998609006405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7434548532089212, "compression_loss": 0.0, "distillation_loss": 0.14300726354122162, "epoch": 1.39, "learning_rate": 4.533880175657419e-05, "loss": 0.1359, "step": 1459, "task_loss": 0.07233646512031555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7434792016283331, "compression_loss": 0.0, "distillation_loss": 0.07946481555700302, "epoch": 1.39, "learning_rate": 4.533260396852646e-05, "loss": 0.0883, "step": 1460, "task_loss": 0.167746901512146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7435035430570993, "compression_loss": 0.0, "distillation_loss": 0.16639113426208496, "epoch": 1.39, "learning_rate": 4.532640248696331e-05, "loss": 0.1619, "step": 1461, "task_loss": 0.12190359830856323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7435278774962234, "compression_loss": 0.0, "distillation_loss": 0.1874895542860031, "epoch": 1.39, "learning_rate": 4.532019731301125e-05, "loss": 0.18, "step": 1462, "task_loss": 0.11228100210428238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.743552204946709, "compression_loss": 0.0, "distillation_loss": 0.18930573761463165, "epoch": 1.39, "learning_rate": 4.531398844779749e-05, "loss": 0.189, "step": 1463, "task_loss": 0.18576987087726593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7435765254095599, "compression_loss": 0.0, "distillation_loss": 0.17121566832065582, "epoch": 1.39, "learning_rate": 4.530777589244989e-05, "loss": 0.1633, "step": 1464, "task_loss": 0.09190039336681366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7436008388857798, "compression_loss": 0.0, "distillation_loss": 0.05851981043815613, "epoch": 1.39, "learning_rate": 4.5301559648096995e-05, "loss": 0.0584, "step": 1465, "task_loss": 0.05738149955868721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7436251453763723, "compression_loss": 0.0, "distillation_loss": 0.18863308429718018, "epoch": 1.39, "learning_rate": 4.529533971586802e-05, "loss": 0.1878, "step": 1466, "task_loss": 0.1802268773317337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7436494448823411, "compression_loss": 0.0, "distillation_loss": 0.07734081149101257, "epoch": 1.39, "learning_rate": 4.5289116096892834e-05, "loss": 0.0851, "step": 1467, "task_loss": 0.15495401620864868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7436737374046899, "compression_loss": 0.0, "distillation_loss": 0.07386526465415955, "epoch": 1.39, "learning_rate": 4.5282888792302e-05, "loss": 0.0687, "step": 1468, "task_loss": 0.022300483658909798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7436980229444223, "compression_loss": 0.0, "distillation_loss": 0.19114729762077332, "epoch": 1.4, "learning_rate": 4.527665780322674e-05, "loss": 0.2027, "step": 1469, "task_loss": 0.3064166009426117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7437223015025421, "compression_loss": 0.0, "distillation_loss": 0.3061857223510742, "epoch": 1.4, "learning_rate": 4.527042313079893e-05, "loss": 0.2977, "step": 1470, "task_loss": 0.22168110311031342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.743746573080053, "compression_loss": 0.0, "distillation_loss": 0.28121721744537354, "epoch": 1.4, "learning_rate": 4.526418477615114e-05, "loss": 0.2646, "step": 1471, "task_loss": 0.11492887139320374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7437708376779586, "compression_loss": 0.0, "distillation_loss": 0.12805329263210297, "epoch": 1.4, "learning_rate": 4.525794274041658e-05, "loss": 0.1237, "step": 1472, "task_loss": 0.08421643078327179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7437950952972626, "compression_loss": 0.0, "distillation_loss": 0.21808746457099915, "epoch": 1.4, "learning_rate": 4.5251697024729165e-05, "loss": 0.2269, "step": 1473, "task_loss": 0.30643704533576965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7438193459389687, "compression_loss": 0.0, "distillation_loss": 0.12836065888404846, "epoch": 1.4, "learning_rate": 4.524544763022346e-05, "loss": 0.1309, "step": 1474, "task_loss": 0.15339140594005585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7438435896040806, "compression_loss": 0.0, "distillation_loss": 0.25501298904418945, "epoch": 1.4, "learning_rate": 4.523919455803468e-05, "loss": 0.2459, "step": 1475, "task_loss": 0.1641104519367218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7438678262936019, "compression_loss": 0.0, "distillation_loss": 0.16674241423606873, "epoch": 1.4, "learning_rate": 4.5232937809298734e-05, "loss": 0.1652, "step": 1476, "task_loss": 0.1514662802219391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7438920560085364, "compression_loss": 0.0, "distillation_loss": 0.18036165833473206, "epoch": 1.4, "learning_rate": 4.5226677385152206e-05, "loss": 0.1784, "step": 1477, "task_loss": 0.1612229347229004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7439162787498876, "compression_loss": 0.0, "distillation_loss": 0.05593033879995346, "epoch": 1.4, "learning_rate": 4.522041328673231e-05, "loss": 0.0531, "step": 1478, "task_loss": 0.027741387486457825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7439404945186594, "compression_loss": 0.0, "distillation_loss": 0.07034408301115036, "epoch": 1.4, "learning_rate": 4.521414551517695e-05, "loss": 0.0644, "step": 1479, "task_loss": 0.011257486417889595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7439647033158554, "compression_loss": 0.0, "distillation_loss": 0.2267724722623825, "epoch": 1.41, "learning_rate": 4.520787407162471e-05, "loss": 0.2152, "step": 1480, "task_loss": 0.11058557778596878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7439889051424792, "compression_loss": 0.0, "distillation_loss": 0.07127425074577332, "epoch": 1.41, "learning_rate": 4.520159895721483e-05, "loss": 0.0828, "step": 1481, "task_loss": 0.18699173629283905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7440130999995346, "compression_loss": 0.0, "distillation_loss": 0.1930515021085739, "epoch": 1.41, "learning_rate": 4.51953201730872e-05, "loss": 0.1903, "step": 1482, "task_loss": 0.1656961441040039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7440372878880251, "compression_loss": 0.0, "distillation_loss": 0.15473680198192596, "epoch": 1.41, "learning_rate": 4.51890377203824e-05, "loss": 0.145, "step": 1483, "task_loss": 0.05734732747077942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7440614688089547, "compression_loss": 0.0, "distillation_loss": 0.22360102832317352, "epoch": 1.41, "learning_rate": 4.518275160024167e-05, "loss": 0.2159, "step": 1484, "task_loss": 0.14627224206924438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7440856427633268, "compression_loss": 0.0, "distillation_loss": 0.17431600391864777, "epoch": 1.41, "learning_rate": 4.5176461813806904e-05, "loss": 0.1683, "step": 1485, "task_loss": 0.1145024299621582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7441098097521451, "compression_loss": 0.0, "distillation_loss": 0.14846046268939972, "epoch": 1.41, "learning_rate": 4.5170168362220686e-05, "loss": 0.1464, "step": 1486, "task_loss": 0.12777863442897797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7441339697764136, "compression_loss": 0.0, "distillation_loss": 0.11251084506511688, "epoch": 1.41, "learning_rate": 4.516387124662624e-05, "loss": 0.1045, "step": 1487, "task_loss": 0.032140232622623444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7441581228371354, "compression_loss": 0.0, "distillation_loss": 0.29036539793014526, "epoch": 1.41, "learning_rate": 4.5157570468167464e-05, "loss": 0.2763, "step": 1488, "task_loss": 0.14925380051136017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7441822689353148, "compression_loss": 0.0, "distillation_loss": 0.06524545699357986, "epoch": 1.41, "learning_rate": 4.5151266027988946e-05, "loss": 0.0624, "step": 1489, "task_loss": 0.03709521144628525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.744206408071955, "compression_loss": 0.0, "distillation_loss": 0.12188740074634552, "epoch": 1.42, "learning_rate": 4.51449579272359e-05, "loss": 0.1168, "step": 1490, "task_loss": 0.07146313041448593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.74423054024806, "compression_loss": 0.0, "distillation_loss": 0.035563819110393524, "epoch": 1.42, "learning_rate": 4.5138646167054224e-05, "loss": 0.0455, "step": 1491, "task_loss": 0.1353863775730133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7442546654646334, "compression_loss": 0.0, "distillation_loss": 0.18486768007278442, "epoch": 1.42, "learning_rate": 4.513233074859049e-05, "loss": 0.1757, "step": 1492, "task_loss": 0.09314113855361938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7442787837226787, "compression_loss": 0.0, "distillation_loss": 0.11438624560832977, "epoch": 1.42, "learning_rate": 4.512601167299191e-05, "loss": 0.1168, "step": 1493, "task_loss": 0.13881582021713257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7443028950231999, "compression_loss": 0.0, "distillation_loss": 0.03467051684856415, "epoch": 1.42, "learning_rate": 4.511968894140639e-05, "loss": 0.0348, "step": 1494, "task_loss": 0.0362866148352623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7443269993672005, "compression_loss": 0.0, "distillation_loss": 0.15380631387233734, "epoch": 1.42, "learning_rate": 4.511336255498247e-05, "loss": 0.1452, "step": 1495, "task_loss": 0.06820768862962723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7443510967556841, "compression_loss": 0.0, "distillation_loss": 0.057968005537986755, "epoch": 1.42, "learning_rate": 4.510703251486937e-05, "loss": 0.0636, "step": 1496, "task_loss": 0.11408950388431549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7443751871896545, "compression_loss": 0.0, "distillation_loss": 0.1298018991947174, "epoch": 1.42, "learning_rate": 4.5100698822216984e-05, "loss": 0.1205, "step": 1497, "task_loss": 0.03705129399895668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7443992706701154, "compression_loss": 0.0, "distillation_loss": 0.056667059659957886, "epoch": 1.42, "learning_rate": 4.509436147817585e-05, "loss": 0.0534, "step": 1498, "task_loss": 0.02439264766871929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7444233471980704, "compression_loss": 0.0, "distillation_loss": 0.123785100877285, "epoch": 1.42, "learning_rate": 4.5088020483897184e-05, "loss": 0.1167, "step": 1499, "task_loss": 0.05303073301911354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7444474167745232, "compression_loss": 0.0, "distillation_loss": 0.02330487221479416, "epoch": 1.42, "learning_rate": 4.508167584053285e-05, "loss": 0.0425, "step": 1500, "task_loss": 0.21552759408950806 }, { "epoch": 1.42, "eval_accuracy": 0.911697247706422, "eval_loss": 0.38651296496391296, "eval_runtime": 18.7646, "eval_samples_per_second": 46.471, "eval_steps_per_second": 5.809, "step": 1500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7444714794004775, "compression_loss": 0.0, "distillation_loss": 0.11092271655797958, "epoch": 1.43, "learning_rate": 4.507532754923537e-05, "loss": 0.1073, "step": 1501, "task_loss": 0.07452307641506195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7444955350769371, "compression_loss": 0.0, "distillation_loss": 0.06650504469871521, "epoch": 1.43, "learning_rate": 4.506897561115797e-05, "loss": 0.0621, "step": 1502, "task_loss": 0.02240423485636711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7445195838049055, "compression_loss": 0.0, "distillation_loss": 0.16672199964523315, "epoch": 1.43, "learning_rate": 4.506262002745449e-05, "loss": 0.1695, "step": 1503, "task_loss": 0.194443479180336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7445436255853864, "compression_loss": 0.0, "distillation_loss": 0.07140068709850311, "epoch": 1.43, "learning_rate": 4.505626079927947e-05, "loss": 0.0655, "step": 1504, "task_loss": 0.012316873297095299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7445676604193836, "compression_loss": 0.0, "distillation_loss": 0.21132929623126984, "epoch": 1.43, "learning_rate": 4.504989792778808e-05, "loss": 0.2003, "step": 1505, "task_loss": 0.10102443397045135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7445916883079007, "compression_loss": 0.0, "distillation_loss": 0.1785595715045929, "epoch": 1.43, "learning_rate": 4.504353141413616e-05, "loss": 0.1787, "step": 1506, "task_loss": 0.18019554018974304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7446157092519414, "compression_loss": 0.0, "distillation_loss": 0.25665420293807983, "epoch": 1.43, "learning_rate": 4.5037161259480246e-05, "loss": 0.2455, "step": 1507, "task_loss": 0.14560416340827942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7446397232525094, "compression_loss": 0.0, "distillation_loss": 0.04967069625854492, "epoch": 1.43, "learning_rate": 4.5030787464977476e-05, "loss": 0.0577, "step": 1508, "task_loss": 0.13003495335578918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7446637303106083, "compression_loss": 0.0, "distillation_loss": 0.17093107104301453, "epoch": 1.43, "learning_rate": 4.50244100317857e-05, "loss": 0.171, "step": 1509, "task_loss": 0.17199328541755676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7446877304272419, "compression_loss": 0.0, "distillation_loss": 0.06911082565784454, "epoch": 1.43, "learning_rate": 4.5018028961063394e-05, "loss": 0.0759, "step": 1510, "task_loss": 0.13733558356761932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7447117236034139, "compression_loss": 0.0, "distillation_loss": 0.21413826942443848, "epoch": 1.43, "learning_rate": 4.501164425396973e-05, "loss": 0.2076, "step": 1511, "task_loss": 0.1490751951932907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7447357098401277, "compression_loss": 0.0, "distillation_loss": 0.1935446858406067, "epoch": 1.44, "learning_rate": 4.5005255911664507e-05, "loss": 0.1988, "step": 1512, "task_loss": 0.2461317479610443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7447596891383874, "compression_loss": 0.0, "distillation_loss": 0.1821810007095337, "epoch": 1.44, "learning_rate": 4.49988639353082e-05, "loss": 0.1794, "step": 1513, "task_loss": 0.15411117672920227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7447836614991964, "compression_loss": 0.0, "distillation_loss": 0.16811317205429077, "epoch": 1.44, "learning_rate": 4.4992468326061944e-05, "loss": 0.1679, "step": 1514, "task_loss": 0.16646404564380646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7448076269235584, "compression_loss": 0.0, "distillation_loss": 0.13295818865299225, "epoch": 1.44, "learning_rate": 4.498606908508754e-05, "loss": 0.1242, "step": 1515, "task_loss": 0.045377686619758606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7448315854124772, "compression_loss": 0.0, "distillation_loss": 0.16021104156970978, "epoch": 1.44, "learning_rate": 4.4979666213547414e-05, "loss": 0.1632, "step": 1516, "task_loss": 0.19016344845294952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7448555369669564, "compression_loss": 0.0, "distillation_loss": 0.1837073564529419, "epoch": 1.44, "learning_rate": 4.497325971260471e-05, "loss": 0.1772, "step": 1517, "task_loss": 0.11903562396764755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7448794815879997, "compression_loss": 0.0, "distillation_loss": 0.08035746216773987, "epoch": 1.44, "learning_rate": 4.496684958342319e-05, "loss": 0.076, "step": 1518, "task_loss": 0.03652361035346985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7449034192766107, "compression_loss": 0.0, "distillation_loss": 0.04287096485495567, "epoch": 1.44, "learning_rate": 4.4960435827167266e-05, "loss": 0.0395, "step": 1519, "task_loss": 0.009310789406299591 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7449273500337933, "compression_loss": 0.0, "distillation_loss": 0.04646936431527138, "epoch": 1.44, "learning_rate": 4.495401844500205e-05, "loss": 0.0439, "step": 1520, "task_loss": 0.02119472809135914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.744951273860551, "compression_loss": 0.0, "distillation_loss": 0.20926059782505035, "epoch": 1.44, "learning_rate": 4.494759743809329e-05, "loss": 0.2045, "step": 1521, "task_loss": 0.16188855469226837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7449751907578875, "compression_loss": 0.0, "distillation_loss": 0.11345314979553223, "epoch": 1.45, "learning_rate": 4.494117280760739e-05, "loss": 0.1181, "step": 1522, "task_loss": 0.16004882752895355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7449991007268065, "compression_loss": 0.0, "distillation_loss": 0.16868895292282104, "epoch": 1.45, "learning_rate": 4.49347445547114e-05, "loss": 0.1641, "step": 1523, "task_loss": 0.12286947667598724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7450230037683118, "compression_loss": 0.0, "distillation_loss": 0.17652767896652222, "epoch": 1.45, "learning_rate": 4.4928312680573064e-05, "loss": 0.168, "step": 1524, "task_loss": 0.09076812118291855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7450468998834069, "compression_loss": 0.0, "distillation_loss": 0.05617145821452141, "epoch": 1.45, "learning_rate": 4.492187718636075e-05, "loss": 0.0604, "step": 1525, "task_loss": 0.09820869565010071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7450707890730955, "compression_loss": 0.0, "distillation_loss": 0.216825470328331, "epoch": 1.45, "learning_rate": 4.49154380732435e-05, "loss": 0.2122, "step": 1526, "task_loss": 0.1709357053041458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7450946713383815, "compression_loss": 0.0, "distillation_loss": 0.4693998694419861, "epoch": 1.45, "learning_rate": 4.490899534239101e-05, "loss": 0.4483, "step": 1527, "task_loss": 0.2588621973991394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7451185466802683, "compression_loss": 0.0, "distillation_loss": 0.2971939742565155, "epoch": 1.45, "learning_rate": 4.490254899497364e-05, "loss": 0.2901, "step": 1528, "task_loss": 0.22666522860527039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7451424150997598, "compression_loss": 0.0, "distillation_loss": 0.25744807720184326, "epoch": 1.45, "learning_rate": 4.4896099032162386e-05, "loss": 0.2487, "step": 1529, "task_loss": 0.17000789940357208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7451662765978594, "compression_loss": 0.0, "distillation_loss": 0.13210158050060272, "epoch": 1.45, "learning_rate": 4.488964545512892e-05, "loss": 0.1324, "step": 1530, "task_loss": 0.13510762155056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7451901311755711, "compression_loss": 0.0, "distillation_loss": 0.2513887882232666, "epoch": 1.45, "learning_rate": 4.488318826504557e-05, "loss": 0.2502, "step": 1531, "task_loss": 0.23999468982219696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7452139788338985, "compression_loss": 0.0, "distillation_loss": 0.2592649757862091, "epoch": 1.45, "learning_rate": 4.4876727463085324e-05, "loss": 0.2465, "step": 1532, "task_loss": 0.13125677406787872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7452378195738452, "compression_loss": 0.0, "distillation_loss": 0.26413312554359436, "epoch": 1.46, "learning_rate": 4.487026305042179e-05, "loss": 0.2514, "step": 1533, "task_loss": 0.13711029291152954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7452616533964148, "compression_loss": 0.0, "distillation_loss": 0.3824172019958496, "epoch": 1.46, "learning_rate": 4.4863795028229286e-05, "loss": 0.3723, "step": 1534, "task_loss": 0.281066358089447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7452854803026112, "compression_loss": 0.0, "distillation_loss": 0.22528879344463348, "epoch": 1.46, "learning_rate": 4.4857323397682746e-05, "loss": 0.2242, "step": 1535, "task_loss": 0.21392083168029785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.745309300293438, "compression_loss": 0.0, "distillation_loss": 0.12722215056419373, "epoch": 1.46, "learning_rate": 4.485084815995778e-05, "loss": 0.1359, "step": 1536, "task_loss": 0.2140396535396576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7453331133698988, "compression_loss": 0.0, "distillation_loss": 0.14468204975128174, "epoch": 1.46, "learning_rate": 4.484436931623064e-05, "loss": 0.1436, "step": 1537, "task_loss": 0.1337466537952423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7453569195329973, "compression_loss": 0.0, "distillation_loss": 0.41913074254989624, "epoch": 1.46, "learning_rate": 4.4837886867678245e-05, "loss": 0.4061, "step": 1538, "task_loss": 0.28871697187423706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7453807187837374, "compression_loss": 0.0, "distillation_loss": 0.12353764474391937, "epoch": 1.46, "learning_rate": 4.4831400815478164e-05, "loss": 0.117, "step": 1539, "task_loss": 0.05776692181825638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7454045111231224, "compression_loss": 0.0, "distillation_loss": 0.07819847762584686, "epoch": 1.46, "learning_rate": 4.482491116080861e-05, "loss": 0.0901, "step": 1540, "task_loss": 0.19672957062721252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7454282965521564, "compression_loss": 0.0, "distillation_loss": 0.3406957983970642, "epoch": 1.46, "learning_rate": 4.4818417904848466e-05, "loss": 0.3262, "step": 1541, "task_loss": 0.19611957669258118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7454520750718426, "compression_loss": 0.0, "distillation_loss": 0.1422961801290512, "epoch": 1.46, "learning_rate": 4.481192104877726e-05, "loss": 0.1342, "step": 1542, "task_loss": 0.060906361788511276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7454758466831852, "compression_loss": 0.0, "distillation_loss": 0.05376805365085602, "epoch": 1.47, "learning_rate": 4.480542059377519e-05, "loss": 0.0542, "step": 1543, "task_loss": 0.05844269320368767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7454996113871876, "compression_loss": 0.0, "distillation_loss": 0.20722997188568115, "epoch": 1.47, "learning_rate": 4.479891654102307e-05, "loss": 0.2045, "step": 1544, "task_loss": 0.1803072690963745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7455233691848534, "compression_loss": 0.0, "distillation_loss": 0.2142692506313324, "epoch": 1.47, "learning_rate": 4.4792408891702426e-05, "loss": 0.2035, "step": 1545, "task_loss": 0.10616156458854675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7455471200771865, "compression_loss": 0.0, "distillation_loss": 0.14973235130310059, "epoch": 1.47, "learning_rate": 4.4785897646995376e-05, "loss": 0.1475, "step": 1546, "task_loss": 0.12703794240951538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7455708640651905, "compression_loss": 0.0, "distillation_loss": 0.18545937538146973, "epoch": 1.47, "learning_rate": 4.477938280808473e-05, "loss": 0.184, "step": 1547, "task_loss": 0.17124545574188232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.745594601149869, "compression_loss": 0.0, "distillation_loss": 0.1463180035352707, "epoch": 1.47, "learning_rate": 4.4772864376153936e-05, "loss": 0.1379, "step": 1548, "task_loss": 0.06201765686273575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7456183313322257, "compression_loss": 0.0, "distillation_loss": 0.1488766223192215, "epoch": 1.47, "learning_rate": 4.4766342352387106e-05, "loss": 0.1424, "step": 1549, "task_loss": 0.08362394571304321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7456420546132644, "compression_loss": 0.0, "distillation_loss": 0.09970419108867645, "epoch": 1.47, "learning_rate": 4.475981673796899e-05, "loss": 0.1015, "step": 1550, "task_loss": 0.11792711168527603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7456657709939887, "compression_loss": 0.0, "distillation_loss": 0.17068204283714294, "epoch": 1.47, "learning_rate": 4.475328753408499e-05, "loss": 0.1719, "step": 1551, "task_loss": 0.18303748965263367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7456894804754023, "compression_loss": 0.0, "distillation_loss": 0.05580803379416466, "epoch": 1.47, "learning_rate": 4.474675474192119e-05, "loss": 0.0535, "step": 1552, "task_loss": 0.03291773051023483 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7457131830585089, "compression_loss": 0.0, "distillation_loss": 0.033489570021629333, "epoch": 1.47, "learning_rate": 4.4740218362664276e-05, "loss": 0.0308, "step": 1553, "task_loss": 0.006132926791906357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.745736878744312, "compression_loss": 0.0, "distillation_loss": 0.0999932736158371, "epoch": 1.48, "learning_rate": 4.473367839750165e-05, "loss": 0.1088, "step": 1554, "task_loss": 0.18756511807441711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7457605675338156, "compression_loss": 0.0, "distillation_loss": 0.1821468025445938, "epoch": 1.48, "learning_rate": 4.4727134847621276e-05, "loss": 0.174, "step": 1555, "task_loss": 0.1006765216588974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7457842494280232, "compression_loss": 0.0, "distillation_loss": 0.15182924270629883, "epoch": 1.48, "learning_rate": 4.4720587714211863e-05, "loss": 0.1564, "step": 1556, "task_loss": 0.1978447288274765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7458079244279385, "compression_loss": 0.0, "distillation_loss": 0.28013527393341064, "epoch": 1.48, "learning_rate": 4.471403699846272e-05, "loss": 0.2684, "step": 1557, "task_loss": 0.16278645396232605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7458315925345651, "compression_loss": 0.0, "distillation_loss": 0.023632332682609558, "epoch": 1.48, "learning_rate": 4.470748270156381e-05, "loss": 0.0296, "step": 1558, "task_loss": 0.08337298780679703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7458552537489069, "compression_loss": 0.0, "distillation_loss": 0.07167325913906097, "epoch": 1.48, "learning_rate": 4.4700924824705745e-05, "loss": 0.0692, "step": 1559, "task_loss": 0.047288812696933746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7458789080719673, "compression_loss": 0.0, "distillation_loss": 0.053809523582458496, "epoch": 1.48, "learning_rate": 4.469436336907982e-05, "loss": 0.0576, "step": 1560, "task_loss": 0.09203386306762695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7459025555047503, "compression_loss": 0.0, "distillation_loss": 0.08844821155071259, "epoch": 1.48, "learning_rate": 4.4687798335877936e-05, "loss": 0.088, "step": 1561, "task_loss": 0.08441342413425446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7459261960482593, "compression_loss": 0.0, "distillation_loss": 0.053808607161045074, "epoch": 1.48, "learning_rate": 4.4681229726292664e-05, "loss": 0.0586, "step": 1562, "task_loss": 0.1021692305803299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7459498297034981, "compression_loss": 0.0, "distillation_loss": 0.24225428700447083, "epoch": 1.48, "learning_rate": 4.4674657541517227e-05, "loss": 0.2405, "step": 1563, "task_loss": 0.22495496273040771 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7459734564714704, "compression_loss": 0.0, "distillation_loss": 0.1985190361738205, "epoch": 1.49, "learning_rate": 4.466808178274549e-05, "loss": 0.1868, "step": 1564, "task_loss": 0.08100724220275879 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7459970763531799, "compression_loss": 0.0, "distillation_loss": 0.2816087603569031, "epoch": 1.49, "learning_rate": 4.4661502451171975e-05, "loss": 0.2677, "step": 1565, "task_loss": 0.14259889721870422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7460206893496302, "compression_loss": 0.0, "distillation_loss": 0.16295886039733887, "epoch": 1.49, "learning_rate": 4.465491954799186e-05, "loss": 0.1618, "step": 1566, "task_loss": 0.1518009454011917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746044295461825, "compression_loss": 0.0, "distillation_loss": 0.17871344089508057, "epoch": 1.49, "learning_rate": 4.4648333074400936e-05, "loss": 0.171, "step": 1567, "task_loss": 0.10131989419460297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7460678946907681, "compression_loss": 0.0, "distillation_loss": 0.10870079696178436, "epoch": 1.49, "learning_rate": 4.464174303159569e-05, "loss": 0.1071, "step": 1568, "task_loss": 0.09272871166467667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746091487037463, "compression_loss": 0.0, "distillation_loss": 0.11727038025856018, "epoch": 1.49, "learning_rate": 4.463514942077323e-05, "loss": 0.1156, "step": 1569, "task_loss": 0.10054733604192734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7461150725029135, "compression_loss": 0.0, "distillation_loss": 0.03882730007171631, "epoch": 1.49, "learning_rate": 4.4628552243131304e-05, "loss": 0.0442, "step": 1570, "task_loss": 0.09213192760944366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7461386510881233, "compression_loss": 0.0, "distillation_loss": 0.09602994471788406, "epoch": 1.49, "learning_rate": 4.462195149986833e-05, "loss": 0.09, "step": 1571, "task_loss": 0.03555634990334511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7461622227940959, "compression_loss": 0.0, "distillation_loss": 0.2854123115539551, "epoch": 1.49, "learning_rate": 4.4615347192183375e-05, "loss": 0.2702, "step": 1572, "task_loss": 0.13358475267887115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7461857876218352, "compression_loss": 0.0, "distillation_loss": 0.08520927280187607, "epoch": 1.49, "learning_rate": 4.4608739321276126e-05, "loss": 0.0797, "step": 1573, "task_loss": 0.03009282425045967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7462093455723449, "compression_loss": 0.0, "distillation_loss": 0.074330173432827, "epoch": 1.49, "learning_rate": 4.4602127888346944e-05, "loss": 0.0698, "step": 1574, "task_loss": 0.028723739087581635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7462328966466284, "compression_loss": 0.0, "distillation_loss": 0.06121968850493431, "epoch": 1.5, "learning_rate": 4.459551289459684e-05, "loss": 0.0707, "step": 1575, "task_loss": 0.15602229535579681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7462564408456897, "compression_loss": 0.0, "distillation_loss": 0.14412710070610046, "epoch": 1.5, "learning_rate": 4.4588894341227426e-05, "loss": 0.1395, "step": 1576, "task_loss": 0.09811600297689438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7462799781705323, "compression_loss": 0.0, "distillation_loss": 0.2897692918777466, "epoch": 1.5, "learning_rate": 4.4582272229441024e-05, "loss": 0.2797, "step": 1577, "task_loss": 0.1895175725221634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7463035086221598, "compression_loss": 0.0, "distillation_loss": 0.06360229849815369, "epoch": 1.5, "learning_rate": 4.457564656044056e-05, "loss": 0.0622, "step": 1578, "task_loss": 0.04995894804596901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7463270322015761, "compression_loss": 0.0, "distillation_loss": 0.0988512635231018, "epoch": 1.5, "learning_rate": 4.456901733542962e-05, "loss": 0.1002, "step": 1579, "task_loss": 0.11196555197238922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7463505489097848, "compression_loss": 0.0, "distillation_loss": 0.11553806811571121, "epoch": 1.5, "learning_rate": 4.4562384555612436e-05, "loss": 0.109, "step": 1580, "task_loss": 0.050435107201337814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7463740587477896, "compression_loss": 0.0, "distillation_loss": 0.4326043128967285, "epoch": 1.5, "learning_rate": 4.455574822219388e-05, "loss": 0.4232, "step": 1581, "task_loss": 0.3386293053627014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7463975617165942, "compression_loss": 0.0, "distillation_loss": 0.2798699736595154, "epoch": 1.5, "learning_rate": 4.454910833637949e-05, "loss": 0.2735, "step": 1582, "task_loss": 0.21614964306354523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7464210578172021, "compression_loss": 0.0, "distillation_loss": 0.1274246722459793, "epoch": 1.5, "learning_rate": 4.454246489937541e-05, "loss": 0.1217, "step": 1583, "task_loss": 0.07030543684959412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7464445470506172, "compression_loss": 0.0, "distillation_loss": 0.07745787501335144, "epoch": 1.5, "learning_rate": 4.4535817912388466e-05, "loss": 0.081, "step": 1584, "task_loss": 0.11285798251628876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7464680294178432, "compression_loss": 0.0, "distillation_loss": 0.2271667718887329, "epoch": 1.51, "learning_rate": 4.4529167376626116e-05, "loss": 0.2175, "step": 1585, "task_loss": 0.13050048053264618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7464915049198835, "compression_loss": 0.0, "distillation_loss": 0.3326779007911682, "epoch": 1.51, "learning_rate": 4.4522513293296456e-05, "loss": 0.3292, "step": 1586, "task_loss": 0.2977886497974396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7465149735577421, "compression_loss": 0.0, "distillation_loss": 0.04014962911605835, "epoch": 1.51, "learning_rate": 4.451585566360823e-05, "loss": 0.052, "step": 1587, "task_loss": 0.15873615443706512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7465384353324225, "compression_loss": 0.0, "distillation_loss": 0.154592826962471, "epoch": 1.51, "learning_rate": 4.450919448877084e-05, "loss": 0.1459, "step": 1588, "task_loss": 0.06723834574222565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7465618902449284, "compression_loss": 0.0, "distillation_loss": 0.05010029301047325, "epoch": 1.51, "learning_rate": 4.4502529769994314e-05, "loss": 0.0464, "step": 1589, "task_loss": 0.012682372704148293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7465853382962635, "compression_loss": 0.0, "distillation_loss": 0.08115085959434509, "epoch": 1.51, "learning_rate": 4.449586150848934e-05, "loss": 0.0893, "step": 1590, "task_loss": 0.1630755513906479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7466087794874315, "compression_loss": 0.0, "distillation_loss": 0.21416254341602325, "epoch": 1.51, "learning_rate": 4.44891897054672e-05, "loss": 0.2044, "step": 1591, "task_loss": 0.11678683757781982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7466322138194361, "compression_loss": 0.0, "distillation_loss": 0.17701666057109833, "epoch": 1.51, "learning_rate": 4.4482514362139915e-05, "loss": 0.1665, "step": 1592, "task_loss": 0.07226403057575226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746655641293281, "compression_loss": 0.0, "distillation_loss": 0.09402104467153549, "epoch": 1.51, "learning_rate": 4.4475835479720065e-05, "loss": 0.0962, "step": 1593, "task_loss": 0.11615651845932007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7466790619099698, "compression_loss": 0.0, "distillation_loss": 0.3029029071331024, "epoch": 1.51, "learning_rate": 4.4469153059420895e-05, "loss": 0.2929, "step": 1594, "task_loss": 0.20262876152992249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7467024756705063, "compression_loss": 0.0, "distillation_loss": 0.17734010517597198, "epoch": 1.51, "learning_rate": 4.4462467102456305e-05, "loss": 0.1858, "step": 1595, "task_loss": 0.26218533515930176 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746725882575894, "compression_loss": 0.0, "distillation_loss": 0.24488165974617004, "epoch": 1.52, "learning_rate": 4.4455777610040846e-05, "loss": 0.252, "step": 1596, "task_loss": 0.3160625696182251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7467492826271367, "compression_loss": 0.0, "distillation_loss": 0.3032427430152893, "epoch": 1.52, "learning_rate": 4.444908458338968e-05, "loss": 0.2889, "step": 1597, "task_loss": 0.16029399633407593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7467726758252381, "compression_loss": 0.0, "distillation_loss": 0.22814524173736572, "epoch": 1.52, "learning_rate": 4.4442388023718624e-05, "loss": 0.231, "step": 1598, "task_loss": 0.25681257247924805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7467960621712019, "compression_loss": 0.0, "distillation_loss": 0.2185569703578949, "epoch": 1.52, "learning_rate": 4.443568793224415e-05, "loss": 0.221, "step": 1599, "task_loss": 0.24304306507110596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7468194416660316, "compression_loss": 0.0, "distillation_loss": 0.1327572613954544, "epoch": 1.52, "learning_rate": 4.4428984310183364e-05, "loss": 0.1352, "step": 1600, "task_loss": 0.15751878917217255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7468428143107311, "compression_loss": 0.0, "distillation_loss": 0.2773556709289551, "epoch": 1.52, "learning_rate": 4.4422277158754005e-05, "loss": 0.2637, "step": 1601, "task_loss": 0.14039871096611023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746866180106304, "compression_loss": 0.0, "distillation_loss": 0.2993885278701782, "epoch": 1.52, "learning_rate": 4.441556647917446e-05, "loss": 0.2958, "step": 1602, "task_loss": 0.26360023021698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.746889539053754, "compression_loss": 0.0, "distillation_loss": 0.14780689775943756, "epoch": 1.52, "learning_rate": 4.440885227266376e-05, "loss": 0.1404, "step": 1603, "task_loss": 0.07409326732158661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7469128911540847, "compression_loss": 0.0, "distillation_loss": 0.1367529034614563, "epoch": 1.52, "learning_rate": 4.440213454044158e-05, "loss": 0.1366, "step": 1604, "task_loss": 0.13533815741539001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7469362364082999, "compression_loss": 0.0, "distillation_loss": 0.11874578893184662, "epoch": 1.52, "learning_rate": 4.43954132837282e-05, "loss": 0.12, "step": 1605, "task_loss": 0.1314026564359665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7469595748174032, "compression_loss": 0.0, "distillation_loss": 0.09664209187030792, "epoch": 1.53, "learning_rate": 4.43886885037446e-05, "loss": 0.0971, "step": 1606, "task_loss": 0.10162815451622009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7469829063823983, "compression_loss": 0.0, "distillation_loss": 0.08660341799259186, "epoch": 1.53, "learning_rate": 4.438196020171235e-05, "loss": 0.0801, "step": 1607, "task_loss": 0.021267052739858627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7470062311042888, "compression_loss": 0.0, "distillation_loss": 0.08157442510128021, "epoch": 1.53, "learning_rate": 4.437522837885369e-05, "loss": 0.0841, "step": 1608, "task_loss": 0.10718289017677307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7470295489840786, "compression_loss": 0.0, "distillation_loss": 0.13874168694019318, "epoch": 1.53, "learning_rate": 4.436849303639148e-05, "loss": 0.1323, "step": 1609, "task_loss": 0.07445013523101807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7470528600227712, "compression_loss": 0.0, "distillation_loss": 0.18086206912994385, "epoch": 1.53, "learning_rate": 4.436175417554923e-05, "loss": 0.1798, "step": 1610, "task_loss": 0.16998088359832764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7470761642213704, "compression_loss": 0.0, "distillation_loss": 0.11296135932207108, "epoch": 1.53, "learning_rate": 4.4355011797551086e-05, "loss": 0.1093, "step": 1611, "task_loss": 0.07632420212030411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7470994615808797, "compression_loss": 0.0, "distillation_loss": 0.10066385567188263, "epoch": 1.53, "learning_rate": 4.4348265903621844e-05, "loss": 0.0935, "step": 1612, "task_loss": 0.028924619778990746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.747122752102303, "compression_loss": 0.0, "distillation_loss": 0.0938473492860794, "epoch": 1.53, "learning_rate": 4.4341516494986904e-05, "loss": 0.107, "step": 1613, "task_loss": 0.22503921389579773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7471460357866438, "compression_loss": 0.0, "distillation_loss": 0.06829311698675156, "epoch": 1.53, "learning_rate": 4.433476357287235e-05, "loss": 0.064, "step": 1614, "task_loss": 0.025221938267350197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7471693126349059, "compression_loss": 0.0, "distillation_loss": 0.29679104685783386, "epoch": 1.53, "learning_rate": 4.432800713850488e-05, "loss": 0.3026, "step": 1615, "task_loss": 0.3548710346221924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.747192582648093, "compression_loss": 0.0, "distillation_loss": 0.05282590910792351, "epoch": 1.53, "learning_rate": 4.432124719311182e-05, "loss": 0.0581, "step": 1616, "task_loss": 0.10553567111492157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7472158458272087, "compression_loss": 0.0, "distillation_loss": 0.07523874193429947, "epoch": 1.54, "learning_rate": 4.431448373792116e-05, "loss": 0.0703, "step": 1617, "task_loss": 0.025965360924601555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7472391021732567, "compression_loss": 0.0, "distillation_loss": 0.24489791691303253, "epoch": 1.54, "learning_rate": 4.430771677416151e-05, "loss": 0.245, "step": 1618, "task_loss": 0.24620510637760162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7472623516872408, "compression_loss": 0.0, "distillation_loss": 0.0306411050260067, "epoch": 1.54, "learning_rate": 4.430094630306212e-05, "loss": 0.0361, "step": 1619, "task_loss": 0.08524684607982635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7472855943701644, "compression_loss": 0.0, "distillation_loss": 0.1247522383928299, "epoch": 1.54, "learning_rate": 4.429417232585288e-05, "loss": 0.1176, "step": 1620, "task_loss": 0.05337625741958618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7473088302230314, "compression_loss": 0.0, "distillation_loss": 0.17402300238609314, "epoch": 1.54, "learning_rate": 4.428739484376431e-05, "loss": 0.1722, "step": 1621, "task_loss": 0.1557716578245163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7473320592468454, "compression_loss": 0.0, "distillation_loss": 0.08097216486930847, "epoch": 1.54, "learning_rate": 4.4280613858027584e-05, "loss": 0.0769, "step": 1622, "task_loss": 0.039864055812358856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7473552814426102, "compression_loss": 0.0, "distillation_loss": 0.2641907334327698, "epoch": 1.54, "learning_rate": 4.427382936987449e-05, "loss": 0.2684, "step": 1623, "task_loss": 0.30657488107681274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7473784968113295, "compression_loss": 0.0, "distillation_loss": 0.30244994163513184, "epoch": 1.54, "learning_rate": 4.426704138053747e-05, "loss": 0.2928, "step": 1624, "task_loss": 0.20630168914794922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7474017053540067, "compression_loss": 0.0, "distillation_loss": 0.08166351914405823, "epoch": 1.54, "learning_rate": 4.426024989124959e-05, "loss": 0.0787, "step": 1625, "task_loss": 0.05223226174712181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7474249070716458, "compression_loss": 0.0, "distillation_loss": 0.044893529266119, "epoch": 1.54, "learning_rate": 4.425345490324456e-05, "loss": 0.052, "step": 1626, "task_loss": 0.11637747287750244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7474481019652502, "compression_loss": 0.0, "distillation_loss": 0.17937017977237701, "epoch": 1.55, "learning_rate": 4.424665641775673e-05, "loss": 0.1818, "step": 1627, "task_loss": 0.20323118567466736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7474712900358239, "compression_loss": 0.0, "distillation_loss": 0.24269092082977295, "epoch": 1.55, "learning_rate": 4.4239854436021056e-05, "loss": 0.2425, "step": 1628, "task_loss": 0.24110905826091766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7474944712843703, "compression_loss": 0.0, "distillation_loss": 0.13650964200496674, "epoch": 1.55, "learning_rate": 4.423304895927317e-05, "loss": 0.1435, "step": 1629, "task_loss": 0.20650048553943634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7475176457118933, "compression_loss": 0.0, "distillation_loss": 0.15174457430839539, "epoch": 1.55, "learning_rate": 4.4226239988749305e-05, "loss": 0.1528, "step": 1630, "task_loss": 0.16226869821548462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7475408133193964, "compression_loss": 0.0, "distillation_loss": 0.17601847648620605, "epoch": 1.55, "learning_rate": 4.4219427525686366e-05, "loss": 0.1722, "step": 1631, "task_loss": 0.13755454123020172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7475639741078833, "compression_loss": 0.0, "distillation_loss": 0.09047359228134155, "epoch": 1.55, "learning_rate": 4.421261157132185e-05, "loss": 0.0868, "step": 1632, "task_loss": 0.053494758903980255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7475871280783579, "compression_loss": 0.0, "distillation_loss": 0.25244152545928955, "epoch": 1.55, "learning_rate": 4.4205792126893905e-05, "loss": 0.2515, "step": 1633, "task_loss": 0.24255844950675964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7476102752318236, "compression_loss": 0.0, "distillation_loss": 0.029148060828447342, "epoch": 1.55, "learning_rate": 4.4198969193641324e-05, "loss": 0.0272, "step": 1634, "task_loss": 0.009713640436530113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7476334155692843, "compression_loss": 0.0, "distillation_loss": 0.1328439861536026, "epoch": 1.55, "learning_rate": 4.4192142772803535e-05, "loss": 0.1342, "step": 1635, "task_loss": 0.14683926105499268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7476565490917435, "compression_loss": 0.0, "distillation_loss": 0.20217649638652802, "epoch": 1.55, "learning_rate": 4.4185312865620575e-05, "loss": 0.192, "step": 1636, "task_loss": 0.1007172167301178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.747679675800205, "compression_loss": 0.0, "distillation_loss": 0.25275999307632446, "epoch": 1.55, "learning_rate": 4.417847947333314e-05, "loss": 0.2413, "step": 1637, "task_loss": 0.1383301168680191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7477027956956724, "compression_loss": 0.0, "distillation_loss": 0.13600948452949524, "epoch": 1.56, "learning_rate": 4.417164259718254e-05, "loss": 0.1313, "step": 1638, "task_loss": 0.08905670791864395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7477259087791495, "compression_loss": 0.0, "distillation_loss": 0.24503761529922485, "epoch": 1.56, "learning_rate": 4.416480223841073e-05, "loss": 0.2425, "step": 1639, "task_loss": 0.21960334479808807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7477490150516399, "compression_loss": 0.0, "distillation_loss": 0.14646950364112854, "epoch": 1.56, "learning_rate": 4.4157958398260294e-05, "loss": 0.147, "step": 1640, "task_loss": 0.1517796665430069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7477721145141473, "compression_loss": 0.0, "distillation_loss": 0.04808887839317322, "epoch": 1.56, "learning_rate": 4.415111107797445e-05, "loss": 0.0487, "step": 1641, "task_loss": 0.05420362949371338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7477952071676754, "compression_loss": 0.0, "distillation_loss": 0.2355082482099533, "epoch": 1.56, "learning_rate": 4.414426027879705e-05, "loss": 0.2322, "step": 1642, "task_loss": 0.20212513208389282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7478182930132279, "compression_loss": 0.0, "distillation_loss": 0.17253591120243073, "epoch": 1.56, "learning_rate": 4.413740600197257e-05, "loss": 0.1679, "step": 1643, "task_loss": 0.12654927372932434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7478413720518083, "compression_loss": 0.0, "distillation_loss": 0.30063819885253906, "epoch": 1.56, "learning_rate": 4.413054824874612e-05, "loss": 0.2869, "step": 1644, "task_loss": 0.16371384263038635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7478644442844206, "compression_loss": 0.0, "distillation_loss": 0.16720086336135864, "epoch": 1.56, "learning_rate": 4.412368702036345e-05, "loss": 0.1603, "step": 1645, "task_loss": 0.09819523990154266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7478875097120682, "compression_loss": 0.0, "distillation_loss": 0.12360204756259918, "epoch": 1.56, "learning_rate": 4.4116822318070925e-05, "loss": 0.116, "step": 1646, "task_loss": 0.047833651304244995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.747910568335755, "compression_loss": 0.0, "distillation_loss": 0.08152472972869873, "epoch": 1.56, "learning_rate": 4.4109954143115565e-05, "loss": 0.0755, "step": 1647, "task_loss": 0.0212980005890131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7479336201564845, "compression_loss": 0.0, "distillation_loss": 0.11956100910902023, "epoch": 1.57, "learning_rate": 4.4103082496745e-05, "loss": 0.1177, "step": 1648, "task_loss": 0.10143022984266281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7479566651752604, "compression_loss": 0.0, "distillation_loss": 0.13289915025234222, "epoch": 1.57, "learning_rate": 4.40962073802075e-05, "loss": 0.1218, "step": 1649, "task_loss": 0.02191145159304142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7479797033930865, "compression_loss": 0.0, "distillation_loss": 0.31535133719444275, "epoch": 1.57, "learning_rate": 4.4089328794751954e-05, "loss": 0.3183, "step": 1650, "task_loss": 0.3446260094642639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7480027348109664, "compression_loss": 0.0, "distillation_loss": 0.04977913200855255, "epoch": 1.57, "learning_rate": 4.4082446741627906e-05, "loss": 0.0466, "step": 1651, "task_loss": 0.018024973571300507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7480257594299038, "compression_loss": 0.0, "distillation_loss": 0.17800500988960266, "epoch": 1.57, "learning_rate": 4.40755612220855e-05, "loss": 0.1833, "step": 1652, "task_loss": 0.23083500564098358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7480487772509024, "compression_loss": 0.0, "distillation_loss": 0.07639691978693008, "epoch": 1.57, "learning_rate": 4.406867223737553e-05, "loss": 0.085, "step": 1653, "task_loss": 0.16263903677463531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7480717882749659, "compression_loss": 0.0, "distillation_loss": 0.14029891788959503, "epoch": 1.57, "learning_rate": 4.406177978874941e-05, "loss": 0.1392, "step": 1654, "task_loss": 0.12951192259788513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7480947925030978, "compression_loss": 0.0, "distillation_loss": 0.16938380897045135, "epoch": 1.57, "learning_rate": 4.405488387745919e-05, "loss": 0.1714, "step": 1655, "task_loss": 0.19002079963684082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7481177899363021, "compression_loss": 0.0, "distillation_loss": 0.1673574298620224, "epoch": 1.57, "learning_rate": 4.4047984504757544e-05, "loss": 0.1728, "step": 1656, "task_loss": 0.2220241129398346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7481407805755822, "compression_loss": 0.0, "distillation_loss": 0.1588287055492401, "epoch": 1.57, "learning_rate": 4.4041081671897775e-05, "loss": 0.15, "step": 1657, "task_loss": 0.07006968557834625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748163764421942, "compression_loss": 0.0, "distillation_loss": 0.20809435844421387, "epoch": 1.57, "learning_rate": 4.403417538013382e-05, "loss": 0.2015, "step": 1658, "task_loss": 0.1424122452735901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748186741476385, "compression_loss": 0.0, "distillation_loss": 0.24815024435520172, "epoch": 1.58, "learning_rate": 4.402726563072024e-05, "loss": 0.2647, "step": 1659, "task_loss": 0.4135337769985199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748209711739915, "compression_loss": 0.0, "distillation_loss": 0.10159791260957718, "epoch": 1.58, "learning_rate": 4.4020352424912226e-05, "loss": 0.1108, "step": 1660, "task_loss": 0.193673774600029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7482326752135356, "compression_loss": 0.0, "distillation_loss": 0.16284605860710144, "epoch": 1.58, "learning_rate": 4.401343576396558e-05, "loss": 0.1678, "step": 1661, "task_loss": 0.2123304009437561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7482556318982505, "compression_loss": 0.0, "distillation_loss": 0.06474165618419647, "epoch": 1.58, "learning_rate": 4.400651564913676e-05, "loss": 0.0615, "step": 1662, "task_loss": 0.03229294717311859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7482785817950635, "compression_loss": 0.0, "distillation_loss": 0.18344372510910034, "epoch": 1.58, "learning_rate": 4.399959208168284e-05, "loss": 0.1741, "step": 1663, "task_loss": 0.09026458114385605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7483015249049781, "compression_loss": 0.0, "distillation_loss": 0.19920691847801208, "epoch": 1.58, "learning_rate": 4.3992665062861514e-05, "loss": 0.1956, "step": 1664, "task_loss": 0.16354140639305115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7483244612289981, "compression_loss": 0.0, "distillation_loss": 0.09947105497121811, "epoch": 1.58, "learning_rate": 4.398573459393111e-05, "loss": 0.0958, "step": 1665, "task_loss": 0.06266459077596664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7483473907681272, "compression_loss": 0.0, "distillation_loss": 0.12703344225883484, "epoch": 1.58, "learning_rate": 4.3978800676150575e-05, "loss": 0.1194, "step": 1666, "task_loss": 0.050371818244457245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748370313523369, "compression_loss": 0.0, "distillation_loss": 0.546389639377594, "epoch": 1.58, "learning_rate": 4.39718633107795e-05, "loss": 0.5258, "step": 1667, "task_loss": 0.3403877019882202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7483932294957272, "compression_loss": 0.0, "distillation_loss": 0.0885571613907814, "epoch": 1.58, "learning_rate": 4.3964922499078084e-05, "loss": 0.0837, "step": 1668, "task_loss": 0.03993313014507294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7484161386862055, "compression_loss": 0.0, "distillation_loss": 0.1787647306919098, "epoch": 1.58, "learning_rate": 4.3957978242307166e-05, "loss": 0.175, "step": 1669, "task_loss": 0.14156457781791687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7484390410958076, "compression_loss": 0.0, "distillation_loss": 0.11095503717660904, "epoch": 1.59, "learning_rate": 4.395103054172819e-05, "loss": 0.1135, "step": 1670, "task_loss": 0.13681727647781372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7484619367255372, "compression_loss": 0.0, "distillation_loss": 0.09957759082317352, "epoch": 1.59, "learning_rate": 4.394407939860325e-05, "loss": 0.1075, "step": 1671, "task_loss": 0.17913945019245148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7484848255763978, "compression_loss": 0.0, "distillation_loss": 0.14932695031166077, "epoch": 1.59, "learning_rate": 4.3937124814195054e-05, "loss": 0.1552, "step": 1672, "task_loss": 0.20759890973567963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7485077076493933, "compression_loss": 0.0, "distillation_loss": 0.11619941145181656, "epoch": 1.59, "learning_rate": 4.393016678976692e-05, "loss": 0.1115, "step": 1673, "task_loss": 0.06885246187448502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7485305829455273, "compression_loss": 0.0, "distillation_loss": 0.12013675272464752, "epoch": 1.59, "learning_rate": 4.3923205326582837e-05, "loss": 0.1239, "step": 1674, "task_loss": 0.15799427032470703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7485534514658035, "compression_loss": 0.0, "distillation_loss": 0.08647421002388, "epoch": 1.59, "learning_rate": 4.3916240425907364e-05, "loss": 0.0989, "step": 1675, "task_loss": 0.2103899121284485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7485763132112256, "compression_loss": 0.0, "distillation_loss": 0.25060856342315674, "epoch": 1.59, "learning_rate": 4.3909272089005714e-05, "loss": 0.2436, "step": 1676, "task_loss": 0.18063423037528992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7485991681827973, "compression_loss": 0.0, "distillation_loss": 0.23684711754322052, "epoch": 1.59, "learning_rate": 4.3902300317143726e-05, "loss": 0.2305, "step": 1677, "task_loss": 0.17349837720394135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7486220163815221, "compression_loss": 0.0, "distillation_loss": 0.2393345683813095, "epoch": 1.59, "learning_rate": 4.389532511158785e-05, "loss": 0.2251, "step": 1678, "task_loss": 0.09700442850589752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748644857808404, "compression_loss": 0.0, "distillation_loss": 0.1394980251789093, "epoch": 1.59, "learning_rate": 4.388834647360516e-05, "loss": 0.1531, "step": 1679, "task_loss": 0.2758055329322815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7486676924644463, "compression_loss": 0.0, "distillation_loss": 0.08193551748991013, "epoch": 1.6, "learning_rate": 4.388136440446337e-05, "loss": 0.0893, "step": 1680, "task_loss": 0.15590821206569672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.748690520350653, "compression_loss": 0.0, "distillation_loss": 0.032735083252191544, "epoch": 1.6, "learning_rate": 4.387437890543081e-05, "loss": 0.0302, "step": 1681, "task_loss": 0.007712380960583687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7487133414680276, "compression_loss": 0.0, "distillation_loss": 0.2094217985868454, "epoch": 1.6, "learning_rate": 4.3867389977776416e-05, "loss": 0.2026, "step": 1682, "task_loss": 0.14127467572689056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7487361558175739, "compression_loss": 0.0, "distillation_loss": 0.14119529724121094, "epoch": 1.6, "learning_rate": 4.3860397622769756e-05, "loss": 0.1352, "step": 1683, "task_loss": 0.08111706376075745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7487589634002955, "compression_loss": 0.0, "distillation_loss": 0.2153528779745102, "epoch": 1.6, "learning_rate": 4.3853401841681046e-05, "loss": 0.2241, "step": 1684, "task_loss": 0.3023759126663208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7487817642171961, "compression_loss": 0.0, "distillation_loss": 0.050505850464105606, "epoch": 1.6, "learning_rate": 4.3846402635781093e-05, "loss": 0.0562, "step": 1685, "task_loss": 0.1069660410284996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7488045582692795, "compression_loss": 0.0, "distillation_loss": 0.1630827784538269, "epoch": 1.6, "learning_rate": 4.3839400006341335e-05, "loss": 0.1598, "step": 1686, "task_loss": 0.1302591860294342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7488273455575492, "compression_loss": 0.0, "distillation_loss": 0.03398447483778, "epoch": 1.6, "learning_rate": 4.383239395463383e-05, "loss": 0.0397, "step": 1687, "task_loss": 0.090923011302948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7488501260830089, "compression_loss": 0.0, "distillation_loss": 0.06578654050827026, "epoch": 1.6, "learning_rate": 4.382538448193127e-05, "loss": 0.0629, "step": 1688, "task_loss": 0.037396807223558426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7488728998466624, "compression_loss": 0.0, "distillation_loss": 0.11456318199634552, "epoch": 1.6, "learning_rate": 4.381837158950695e-05, "loss": 0.1149, "step": 1689, "task_loss": 0.11771449446678162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7488956668495133, "compression_loss": 0.0, "distillation_loss": 0.09141987562179565, "epoch": 1.6, "learning_rate": 4.3811355278634804e-05, "loss": 0.0861, "step": 1690, "task_loss": 0.03799279406666756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7489184270925653, "compression_loss": 0.0, "distillation_loss": 0.07688391953706741, "epoch": 1.61, "learning_rate": 4.380433555058937e-05, "loss": 0.0735, "step": 1691, "task_loss": 0.04309658333659172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7489411805768221, "compression_loss": 0.0, "distillation_loss": 0.24183808267116547, "epoch": 1.61, "learning_rate": 4.379731240664583e-05, "loss": 0.2428, "step": 1692, "task_loss": 0.251809298992157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7489639273032873, "compression_loss": 0.0, "distillation_loss": 0.04763605445623398, "epoch": 1.61, "learning_rate": 4.379028584807996e-05, "loss": 0.0519, "step": 1693, "task_loss": 0.09006030112504959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7489866672729648, "compression_loss": 0.0, "distillation_loss": 0.26702186465263367, "epoch": 1.61, "learning_rate": 4.3783255876168165e-05, "loss": 0.2575, "step": 1694, "task_loss": 0.17218661308288574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749009400486858, "compression_loss": 0.0, "distillation_loss": 0.1826254427433014, "epoch": 1.61, "learning_rate": 4.377622249218748e-05, "loss": 0.1761, "step": 1695, "task_loss": 0.11716414242982864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7490321269459708, "compression_loss": 0.0, "distillation_loss": 0.06596788763999939, "epoch": 1.61, "learning_rate": 4.376918569741556e-05, "loss": 0.078, "step": 1696, "task_loss": 0.18661688268184662 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7490548466513067, "compression_loss": 0.0, "distillation_loss": 0.07024285942316055, "epoch": 1.61, "learning_rate": 4.376214549313066e-05, "loss": 0.0653, "step": 1697, "task_loss": 0.020744740962982178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7490775596038696, "compression_loss": 0.0, "distillation_loss": 0.13795125484466553, "epoch": 1.61, "learning_rate": 4.375510188061167e-05, "loss": 0.1382, "step": 1698, "task_loss": 0.14067493379116058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749100265804663, "compression_loss": 0.0, "distillation_loss": 0.06560136377811432, "epoch": 1.61, "learning_rate": 4.37480548611381e-05, "loss": 0.0745, "step": 1699, "task_loss": 0.15437015891075134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7491229652546907, "compression_loss": 0.0, "distillation_loss": 0.10080569982528687, "epoch": 1.61, "learning_rate": 4.374100443599007e-05, "loss": 0.0956, "step": 1700, "task_loss": 0.04915327578783035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7491456579549562, "compression_loss": 0.0, "distillation_loss": 0.151437908411026, "epoch": 1.62, "learning_rate": 4.3733950606448324e-05, "loss": 0.1444, "step": 1701, "task_loss": 0.08063948154449463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7491683439064634, "compression_loss": 0.0, "distillation_loss": 0.14637860655784607, "epoch": 1.62, "learning_rate": 4.3726893373794234e-05, "loss": 0.1415, "step": 1702, "task_loss": 0.09764857590198517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7491910231102158, "compression_loss": 0.0, "distillation_loss": 0.18097403645515442, "epoch": 1.62, "learning_rate": 4.3719832739309766e-05, "loss": 0.1771, "step": 1703, "task_loss": 0.1424616128206253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7492136955672173, "compression_loss": 0.0, "distillation_loss": 0.22387400269508362, "epoch": 1.62, "learning_rate": 4.371276870427753e-05, "loss": 0.2137, "step": 1704, "task_loss": 0.12188294529914856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7492363612784714, "compression_loss": 0.0, "distillation_loss": 0.09288572520017624, "epoch": 1.62, "learning_rate": 4.3705701269980734e-05, "loss": 0.0888, "step": 1705, "task_loss": 0.05178219825029373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7492590202449818, "compression_loss": 0.0, "distillation_loss": 0.05206472426652908, "epoch": 1.62, "learning_rate": 4.369863043770322e-05, "loss": 0.048, "step": 1706, "task_loss": 0.01116347685456276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7492816724677522, "compression_loss": 0.0, "distillation_loss": 0.11153702437877655, "epoch": 1.62, "learning_rate": 4.369155620872943e-05, "loss": 0.1047, "step": 1707, "task_loss": 0.04338126629590988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7493043179477863, "compression_loss": 0.0, "distillation_loss": 0.15479451417922974, "epoch": 1.62, "learning_rate": 4.3684478584344433e-05, "loss": 0.1515, "step": 1708, "task_loss": 0.12206155061721802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7493269566860878, "compression_loss": 0.0, "distillation_loss": 0.05233832448720932, "epoch": 1.62, "learning_rate": 4.367739756583392e-05, "loss": 0.0498, "step": 1709, "task_loss": 0.026678021997213364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7493495886836604, "compression_loss": 0.0, "distillation_loss": 0.05933618173003197, "epoch": 1.62, "learning_rate": 4.367031315448419e-05, "loss": 0.0733, "step": 1710, "task_loss": 0.19880297780036926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7493722139415077, "compression_loss": 0.0, "distillation_loss": 0.1785992830991745, "epoch": 1.62, "learning_rate": 4.366322535158215e-05, "loss": 0.1674, "step": 1711, "task_loss": 0.06677613407373428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7493948324606334, "compression_loss": 0.0, "distillation_loss": 0.05169357359409332, "epoch": 1.63, "learning_rate": 4.3656134158415344e-05, "loss": 0.0492, "step": 1712, "task_loss": 0.026786495000123978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7494174442420414, "compression_loss": 0.0, "distillation_loss": 0.09955106675624847, "epoch": 1.63, "learning_rate": 4.364903957627192e-05, "loss": 0.0959, "step": 1713, "task_loss": 0.06332787871360779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749440049286735, "compression_loss": 0.0, "distillation_loss": 0.20070070028305054, "epoch": 1.63, "learning_rate": 4.3641941606440644e-05, "loss": 0.1948, "step": 1714, "task_loss": 0.14132705330848694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7494626475957181, "compression_loss": 0.0, "distillation_loss": 0.07433759421110153, "epoch": 1.63, "learning_rate": 4.36348402502109e-05, "loss": 0.0819, "step": 1715, "task_loss": 0.15012231469154358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7494852391699944, "compression_loss": 0.0, "distillation_loss": 0.13596729934215546, "epoch": 1.63, "learning_rate": 4.3627735508872666e-05, "loss": 0.1536, "step": 1716, "task_loss": 0.31197068095207214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7495078240105675, "compression_loss": 0.0, "distillation_loss": 0.2269076108932495, "epoch": 1.63, "learning_rate": 4.362062738371657e-05, "loss": 0.2384, "step": 1717, "task_loss": 0.3419533371925354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7495304021184411, "compression_loss": 0.0, "distillation_loss": 0.20675890147686005, "epoch": 1.63, "learning_rate": 4.361351587603384e-05, "loss": 0.2075, "step": 1718, "task_loss": 0.21458357572555542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7495529734946189, "compression_loss": 0.0, "distillation_loss": 0.10246886312961578, "epoch": 1.63, "learning_rate": 4.360640098711629e-05, "loss": 0.0964, "step": 1719, "task_loss": 0.04224860668182373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7495755381401047, "compression_loss": 0.0, "distillation_loss": 0.250265896320343, "epoch": 1.63, "learning_rate": 4.3599282718256406e-05, "loss": 0.2593, "step": 1720, "task_loss": 0.34033694863319397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749598096055902, "compression_loss": 0.0, "distillation_loss": 0.11491803824901581, "epoch": 1.63, "learning_rate": 4.3592161070747233e-05, "loss": 0.1297, "step": 1721, "task_loss": 0.2628564238548279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7496206472430145, "compression_loss": 0.0, "distillation_loss": 0.05386704206466675, "epoch": 1.64, "learning_rate": 4.358503604588247e-05, "loss": 0.0536, "step": 1722, "task_loss": 0.050750650465488434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749643191702446, "compression_loss": 0.0, "distillation_loss": 0.35980457067489624, "epoch": 1.64, "learning_rate": 4.357790764495639e-05, "loss": 0.3559, "step": 1723, "task_loss": 0.3210410475730896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7496657294352, "compression_loss": 0.0, "distillation_loss": 0.08513958752155304, "epoch": 1.64, "learning_rate": 4.357077586926392e-05, "loss": 0.0913, "step": 1724, "task_loss": 0.14632166922092438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7496882604422804, "compression_loss": 0.0, "distillation_loss": 0.16130788624286652, "epoch": 1.64, "learning_rate": 4.356364072010059e-05, "loss": 0.1687, "step": 1725, "task_loss": 0.23540650308132172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7497107847246908, "compression_loss": 0.0, "distillation_loss": 0.10402365028858185, "epoch": 1.64, "learning_rate": 4.3556502198762496e-05, "loss": 0.1023, "step": 1726, "task_loss": 0.08679551631212234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7497333022834348, "compression_loss": 0.0, "distillation_loss": 0.07432233542203903, "epoch": 1.64, "learning_rate": 4.354936030654642e-05, "loss": 0.0807, "step": 1727, "task_loss": 0.13773983716964722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7497558131195161, "compression_loss": 0.0, "distillation_loss": 0.2607779800891876, "epoch": 1.64, "learning_rate": 4.3542215044749705e-05, "loss": 0.2518, "step": 1728, "task_loss": 0.1711687445640564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7497783172339385, "compression_loss": 0.0, "distillation_loss": 0.059374839067459106, "epoch": 1.64, "learning_rate": 4.3535066414670336e-05, "loss": 0.0628, "step": 1729, "task_loss": 0.09339209645986557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7498008146277056, "compression_loss": 0.0, "distillation_loss": 0.20149439573287964, "epoch": 1.64, "learning_rate": 4.352791441760687e-05, "loss": 0.1973, "step": 1730, "task_loss": 0.15943476557731628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7498233053018211, "compression_loss": 0.0, "distillation_loss": 0.13185396790504456, "epoch": 1.64, "learning_rate": 4.352075905485854e-05, "loss": 0.1234, "step": 1731, "task_loss": 0.04712344706058502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7498457892572886, "compression_loss": 0.0, "distillation_loss": 0.1501794159412384, "epoch": 1.64, "learning_rate": 4.351360032772512e-05, "loss": 0.1726, "step": 1732, "task_loss": 0.3744635581970215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.749868266495112, "compression_loss": 0.0, "distillation_loss": 0.07114310562610626, "epoch": 1.65, "learning_rate": 4.3506438237507033e-05, "loss": 0.0689, "step": 1733, "task_loss": 0.048517607152462006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7498907370162946, "compression_loss": 0.0, "distillation_loss": 0.13893577456474304, "epoch": 1.65, "learning_rate": 4.3499272785505316e-05, "loss": 0.159, "step": 1734, "task_loss": 0.33932289481163025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7499132008218404, "compression_loss": 0.0, "distillation_loss": 0.14102880656719208, "epoch": 1.65, "learning_rate": 4.349210397302161e-05, "loss": 0.1395, "step": 1735, "task_loss": 0.12529143691062927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7499356579127531, "compression_loss": 0.0, "distillation_loss": 0.2732577919960022, "epoch": 1.65, "learning_rate": 4.348493180135815e-05, "loss": 0.2615, "step": 1736, "task_loss": 0.15519018471240997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7499581082900362, "compression_loss": 0.0, "distillation_loss": 0.061276476830244064, "epoch": 1.65, "learning_rate": 4.347775627181782e-05, "loss": 0.0631, "step": 1737, "task_loss": 0.07924476265907288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7499805519546935, "compression_loss": 0.0, "distillation_loss": 0.20356178283691406, "epoch": 1.65, "learning_rate": 4.3470577385704056e-05, "loss": 0.2137, "step": 1738, "task_loss": 0.3052142560482025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7500029889077287, "compression_loss": 0.0, "distillation_loss": 0.24523907899856567, "epoch": 1.65, "learning_rate": 4.346339514432096e-05, "loss": 0.2379, "step": 1739, "task_loss": 0.17217698693275452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7500254191501452, "compression_loss": 0.0, "distillation_loss": 0.14814287424087524, "epoch": 1.65, "learning_rate": 4.345620954897322e-05, "loss": 0.1381, "step": 1740, "task_loss": 0.04762396588921547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7500478426829471, "compression_loss": 0.0, "distillation_loss": 0.08230330049991608, "epoch": 1.65, "learning_rate": 4.344902060096612e-05, "loss": 0.0829, "step": 1741, "task_loss": 0.08790513873100281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7500702595071379, "compression_loss": 0.0, "distillation_loss": 0.12028224766254425, "epoch": 1.65, "learning_rate": 4.344182830160558e-05, "loss": 0.1143, "step": 1742, "task_loss": 0.060103029012680054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7500926696237211, "compression_loss": 0.0, "distillation_loss": 0.12078093737363815, "epoch": 1.66, "learning_rate": 4.343463265219811e-05, "loss": 0.115, "step": 1743, "task_loss": 0.06334509700536728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7501150730337007, "compression_loss": 0.0, "distillation_loss": 0.10595209896564484, "epoch": 1.66, "learning_rate": 4.342743365405084e-05, "loss": 0.1103, "step": 1744, "task_loss": 0.14954574406147003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7501374697380803, "compression_loss": 0.0, "distillation_loss": 0.16210979223251343, "epoch": 1.66, "learning_rate": 4.3420231308471496e-05, "loss": 0.1555, "step": 1745, "task_loss": 0.09629593789577484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7501598597378634, "compression_loss": 0.0, "distillation_loss": 0.185985267162323, "epoch": 1.66, "learning_rate": 4.3413025616768424e-05, "loss": 0.1858, "step": 1746, "task_loss": 0.1842183768749237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7501822430340538, "compression_loss": 0.0, "distillation_loss": 0.1382254958152771, "epoch": 1.66, "learning_rate": 4.340581658025058e-05, "loss": 0.1359, "step": 1747, "task_loss": 0.11496403813362122 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7502046196276552, "compression_loss": 0.0, "distillation_loss": 0.0522196963429451, "epoch": 1.66, "learning_rate": 4.33986042002275e-05, "loss": 0.058, "step": 1748, "task_loss": 0.10959599912166595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7502269895196713, "compression_loss": 0.0, "distillation_loss": 0.045316457748413086, "epoch": 1.66, "learning_rate": 4.339138847800936e-05, "loss": 0.0488, "step": 1749, "task_loss": 0.08048881590366364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7502493527111057, "compression_loss": 0.0, "distillation_loss": 0.11536607146263123, "epoch": 1.66, "learning_rate": 4.3384169414906925e-05, "loss": 0.1198, "step": 1750, "task_loss": 0.1599726378917694 }, { "epoch": 1.66, "eval_accuracy": 0.9002293577981652, "eval_loss": 0.3980746567249298, "eval_runtime": 18.3137, "eval_samples_per_second": 47.615, "eval_steps_per_second": 5.952, "step": 1750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7502717092029622, "compression_loss": 0.0, "distillation_loss": 0.22586020827293396, "epoch": 1.66, "learning_rate": 4.3376947012231586e-05, "loss": 0.214, "step": 1751, "task_loss": 0.10768207907676697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7502940589962444, "compression_loss": 0.0, "distillation_loss": 0.10226061195135117, "epoch": 1.66, "learning_rate": 4.336972127129532e-05, "loss": 0.1036, "step": 1752, "task_loss": 0.11605414748191833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7503164020919558, "compression_loss": 0.0, "distillation_loss": 0.2922346591949463, "epoch": 1.66, "learning_rate": 4.3362492193410705e-05, "loss": 0.2796, "step": 1753, "task_loss": 0.1663048416376114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7503387384911004, "compression_loss": 0.0, "distillation_loss": 0.05565609037876129, "epoch": 1.67, "learning_rate": 4.335525977989095e-05, "loss": 0.0522, "step": 1754, "task_loss": 0.020734498277306557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7503610681946818, "compression_loss": 0.0, "distillation_loss": 0.19371697306632996, "epoch": 1.67, "learning_rate": 4.334802403204986e-05, "loss": 0.1989, "step": 1755, "task_loss": 0.2459201216697693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7503833912037037, "compression_loss": 0.0, "distillation_loss": 0.12561503052711487, "epoch": 1.67, "learning_rate": 4.334078495120184e-05, "loss": 0.1226, "step": 1756, "task_loss": 0.0955725908279419 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7504057075191697, "compression_loss": 0.0, "distillation_loss": 0.25841909646987915, "epoch": 1.67, "learning_rate": 4.33335425386619e-05, "loss": 0.2526, "step": 1757, "task_loss": 0.2000465989112854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7504280171420834, "compression_loss": 0.0, "distillation_loss": 0.18024028837680817, "epoch": 1.67, "learning_rate": 4.332629679574566e-05, "loss": 0.1706, "step": 1758, "task_loss": 0.08373329043388367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7504503200734487, "compression_loss": 0.0, "distillation_loss": 0.21449556946754456, "epoch": 1.67, "learning_rate": 4.331904772376935e-05, "loss": 0.2073, "step": 1759, "task_loss": 0.14211583137512207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7504726163142691, "compression_loss": 0.0, "distillation_loss": 0.17756842076778412, "epoch": 1.67, "learning_rate": 4.3311795324049795e-05, "loss": 0.1765, "step": 1760, "task_loss": 0.16666541993618011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7504949058655483, "compression_loss": 0.0, "distillation_loss": 0.1488828957080841, "epoch": 1.67, "learning_rate": 4.3304539597904435e-05, "loss": 0.1445, "step": 1761, "task_loss": 0.10537352412939072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7505171887282901, "compression_loss": 0.0, "distillation_loss": 0.18257030844688416, "epoch": 1.67, "learning_rate": 4.3297280546651295e-05, "loss": 0.185, "step": 1762, "task_loss": 0.20700550079345703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7505394649034981, "compression_loss": 0.0, "distillation_loss": 0.04670891910791397, "epoch": 1.67, "learning_rate": 4.329001817160903e-05, "loss": 0.05, "step": 1763, "task_loss": 0.07921046018600464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.750561734392176, "compression_loss": 0.0, "distillation_loss": 0.15156331658363342, "epoch": 1.68, "learning_rate": 4.3282752474096864e-05, "loss": 0.1615, "step": 1764, "task_loss": 0.2507190704345703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7505839971953274, "compression_loss": 0.0, "distillation_loss": 0.23599442839622498, "epoch": 1.68, "learning_rate": 4.327548345543467e-05, "loss": 0.2256, "step": 1765, "task_loss": 0.13191649317741394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7506062533139561, "compression_loss": 0.0, "distillation_loss": 0.15651676058769226, "epoch": 1.68, "learning_rate": 4.326821111694289e-05, "loss": 0.1509, "step": 1766, "task_loss": 0.10070617496967316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7506285027490659, "compression_loss": 0.0, "distillation_loss": 0.14649711549282074, "epoch": 1.68, "learning_rate": 4.3260935459942584e-05, "loss": 0.144, "step": 1767, "task_loss": 0.12118849158287048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7506507455016602, "compression_loss": 0.0, "distillation_loss": 0.05859662592411041, "epoch": 1.68, "learning_rate": 4.32536564857554e-05, "loss": 0.0547, "step": 1768, "task_loss": 0.020115777850151062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7506729815727428, "compression_loss": 0.0, "distillation_loss": 0.24223226308822632, "epoch": 1.68, "learning_rate": 4.3246374195703604e-05, "loss": 0.2509, "step": 1769, "task_loss": 0.32900571823120117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7506952109633174, "compression_loss": 0.0, "distillation_loss": 0.11694598197937012, "epoch": 1.68, "learning_rate": 4.3239088591110065e-05, "loss": 0.1294, "step": 1770, "task_loss": 0.2414626181125641 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7507174336743877, "compression_loss": 0.0, "distillation_loss": 0.23970989882946014, "epoch": 1.68, "learning_rate": 4.323179967329824e-05, "loss": 0.2291, "step": 1771, "task_loss": 0.13375961780548096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7507396497069573, "compression_loss": 0.0, "distillation_loss": 0.050326522439718246, "epoch": 1.68, "learning_rate": 4.3224507443592196e-05, "loss": 0.0593, "step": 1772, "task_loss": 0.13998878002166748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.75076185906203, "compression_loss": 0.0, "distillation_loss": 0.23953363299369812, "epoch": 1.68, "learning_rate": 4.321721190331661e-05, "loss": 0.2254, "step": 1773, "task_loss": 0.09803837537765503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7507840617406093, "compression_loss": 0.0, "distillation_loss": 0.12072421610355377, "epoch": 1.68, "learning_rate": 4.3209913053796746e-05, "loss": 0.1307, "step": 1774, "task_loss": 0.2204258143901825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7508062577436991, "compression_loss": 0.0, "distillation_loss": 0.2765160799026489, "epoch": 1.69, "learning_rate": 4.3202610896358474e-05, "loss": 0.2627, "step": 1775, "task_loss": 0.13829368352890015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7508284470723029, "compression_loss": 0.0, "distillation_loss": 0.16436317563056946, "epoch": 1.69, "learning_rate": 4.319530543232827e-05, "loss": 0.1646, "step": 1776, "task_loss": 0.16645075380802155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7508506297274246, "compression_loss": 0.0, "distillation_loss": 0.1174478605389595, "epoch": 1.69, "learning_rate": 4.31879966630332e-05, "loss": 0.1172, "step": 1777, "task_loss": 0.11536243557929993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7508728057100676, "compression_loss": 0.0, "distillation_loss": 0.33932632207870483, "epoch": 1.69, "learning_rate": 4.318068458980095e-05, "loss": 0.3292, "step": 1778, "task_loss": 0.23805338144302368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7508949750212358, "compression_loss": 0.0, "distillation_loss": 0.08245585113763809, "epoch": 1.69, "learning_rate": 4.317336921395978e-05, "loss": 0.0889, "step": 1779, "task_loss": 0.14642596244812012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7509171376619328, "compression_loss": 0.0, "distillation_loss": 0.32606542110443115, "epoch": 1.69, "learning_rate": 4.316605053683856e-05, "loss": 0.3143, "step": 1780, "task_loss": 0.20817196369171143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7509392936331623, "compression_loss": 0.0, "distillation_loss": 0.10882005095481873, "epoch": 1.69, "learning_rate": 4.3158728559766786e-05, "loss": 0.1177, "step": 1781, "task_loss": 0.1978674679994583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7509614429359279, "compression_loss": 0.0, "distillation_loss": 0.0932815819978714, "epoch": 1.69, "learning_rate": 4.315140328407451e-05, "loss": 0.0889, "step": 1782, "task_loss": 0.0497298426926136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7509835855712335, "compression_loss": 0.0, "distillation_loss": 0.12256644666194916, "epoch": 1.69, "learning_rate": 4.314407471109241e-05, "loss": 0.1198, "step": 1783, "task_loss": 0.0945819541811943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7510057215400826, "compression_loss": 0.0, "distillation_loss": 0.09638384729623795, "epoch": 1.69, "learning_rate": 4.313674284215176e-05, "loss": 0.0956, "step": 1784, "task_loss": 0.08847030997276306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7510278508434789, "compression_loss": 0.0, "distillation_loss": 0.15519554913043976, "epoch": 1.7, "learning_rate": 4.312940767858441e-05, "loss": 0.1552, "step": 1785, "task_loss": 0.15559692680835724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7510499734824261, "compression_loss": 0.0, "distillation_loss": 0.09053555130958557, "epoch": 1.7, "learning_rate": 4.312206922172286e-05, "loss": 0.0836, "step": 1786, "task_loss": 0.021591845899820328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7510720894579278, "compression_loss": 0.0, "distillation_loss": 0.0775122344493866, "epoch": 1.7, "learning_rate": 4.311472747290015e-05, "loss": 0.0729, "step": 1787, "task_loss": 0.03092704340815544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7510941987709878, "compression_loss": 0.0, "distillation_loss": 0.20807501673698425, "epoch": 1.7, "learning_rate": 4.310738243344996e-05, "loss": 0.2011, "step": 1788, "task_loss": 0.1382240504026413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7511163014226098, "compression_loss": 0.0, "distillation_loss": 0.13786278665065765, "epoch": 1.7, "learning_rate": 4.310003410470653e-05, "loss": 0.137, "step": 1789, "task_loss": 0.12924642860889435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7511383974137974, "compression_loss": 0.0, "distillation_loss": 0.03874251991510391, "epoch": 1.7, "learning_rate": 4.309268248800476e-05, "loss": 0.0455, "step": 1790, "task_loss": 0.10590145736932755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7511604867455544, "compression_loss": 0.0, "distillation_loss": 0.16585403680801392, "epoch": 1.7, "learning_rate": 4.3085327584680056e-05, "loss": 0.1747, "step": 1791, "task_loss": 0.2540725767612457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7511825694188843, "compression_loss": 0.0, "distillation_loss": 0.17077066004276276, "epoch": 1.7, "learning_rate": 4.3077969396068505e-05, "loss": 0.172, "step": 1792, "task_loss": 0.18328894674777985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7512046454347909, "compression_loss": 0.0, "distillation_loss": 0.23324266076087952, "epoch": 1.7, "learning_rate": 4.307060792350675e-05, "loss": 0.2235, "step": 1793, "task_loss": 0.13554205000400543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7512267147942778, "compression_loss": 0.0, "distillation_loss": 0.06733982264995575, "epoch": 1.7, "learning_rate": 4.306324316833203e-05, "loss": 0.0625, "step": 1794, "task_loss": 0.019262997433543205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7512487774983488, "compression_loss": 0.0, "distillation_loss": 0.1413702815771103, "epoch": 1.7, "learning_rate": 4.3055875131882204e-05, "loss": 0.1334, "step": 1795, "task_loss": 0.061707641929388046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7512708335480076, "compression_loss": 0.0, "distillation_loss": 0.1853601038455963, "epoch": 1.71, "learning_rate": 4.30485038154957e-05, "loss": 0.1838, "step": 1796, "task_loss": 0.16993498802185059 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7512928829442577, "compression_loss": 0.0, "distillation_loss": 0.15853723883628845, "epoch": 1.71, "learning_rate": 4.304112922051155e-05, "loss": 0.1542, "step": 1797, "task_loss": 0.11558166146278381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7513149256881029, "compression_loss": 0.0, "distillation_loss": 0.1695374995470047, "epoch": 1.71, "learning_rate": 4.30337513482694e-05, "loss": 0.1642, "step": 1798, "task_loss": 0.11586904525756836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7513369617805469, "compression_loss": 0.0, "distillation_loss": 0.05897611379623413, "epoch": 1.71, "learning_rate": 4.3026370200109463e-05, "loss": 0.0712, "step": 1799, "task_loss": 0.18152813613414764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7513589912225933, "compression_loss": 0.0, "distillation_loss": 0.12656652927398682, "epoch": 1.71, "learning_rate": 4.301898577737255e-05, "loss": 0.1314, "step": 1800, "task_loss": 0.17490315437316895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7513810140152459, "compression_loss": 0.0, "distillation_loss": 0.05648940056562424, "epoch": 1.71, "learning_rate": 4.3011598081400105e-05, "loss": 0.0567, "step": 1801, "task_loss": 0.05847236514091492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7514030301595083, "compression_loss": 0.0, "distillation_loss": 0.20593807101249695, "epoch": 1.71, "learning_rate": 4.3004207113534124e-05, "loss": 0.1972, "step": 1802, "task_loss": 0.1184128075838089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7514250396563842, "compression_loss": 0.0, "distillation_loss": 0.11276452243328094, "epoch": 1.71, "learning_rate": 4.2996812875117206e-05, "loss": 0.1387, "step": 1803, "task_loss": 0.3719395101070404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7514470425068772, "compression_loss": 0.0, "distillation_loss": 0.23946917057037354, "epoch": 1.71, "learning_rate": 4.2989415367492556e-05, "loss": 0.2358, "step": 1804, "task_loss": 0.20264212787151337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7514690387119911, "compression_loss": 0.0, "distillation_loss": 0.14112165570259094, "epoch": 1.71, "learning_rate": 4.298201459200397e-05, "loss": 0.1457, "step": 1805, "task_loss": 0.1867811530828476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7514910282727296, "compression_loss": 0.0, "distillation_loss": 0.18797874450683594, "epoch": 1.72, "learning_rate": 4.2974610549995834e-05, "loss": 0.187, "step": 1806, "task_loss": 0.17826765775680542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7515130111900963, "compression_loss": 0.0, "distillation_loss": 0.16024428606033325, "epoch": 1.72, "learning_rate": 4.296720324281311e-05, "loss": 0.157, "step": 1807, "task_loss": 0.12820284068584442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.751534987465095, "compression_loss": 0.0, "distillation_loss": 0.16456547379493713, "epoch": 1.72, "learning_rate": 4.29597926718014e-05, "loss": 0.1509, "step": 1808, "task_loss": 0.028283601626753807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.751556957098729, "compression_loss": 0.0, "distillation_loss": 0.085496686398983, "epoch": 1.72, "learning_rate": 4.295237883830685e-05, "loss": 0.0912, "step": 1809, "task_loss": 0.14275582134723663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7515789200920026, "compression_loss": 0.0, "distillation_loss": 0.24256683886051178, "epoch": 1.72, "learning_rate": 4.294496174367623e-05, "loss": 0.2418, "step": 1810, "task_loss": 0.23468558490276337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.751600876445919, "compression_loss": 0.0, "distillation_loss": 0.08293971419334412, "epoch": 1.72, "learning_rate": 4.2937541389256877e-05, "loss": 0.0832, "step": 1811, "task_loss": 0.08516831696033478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.751622826161482, "compression_loss": 0.0, "distillation_loss": 0.15826871991157532, "epoch": 1.72, "learning_rate": 4.293011777639675e-05, "loss": 0.1666, "step": 1812, "task_loss": 0.2412494570016861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7516447692396955, "compression_loss": 0.0, "distillation_loss": 0.16091185808181763, "epoch": 1.72, "learning_rate": 4.2922690906444374e-05, "loss": 0.1583, "step": 1813, "task_loss": 0.13493165373802185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7516667056815628, "compression_loss": 0.0, "distillation_loss": 0.06874893605709076, "epoch": 1.72, "learning_rate": 4.291526078074888e-05, "loss": 0.0689, "step": 1814, "task_loss": 0.07061054557561874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7516886354880878, "compression_loss": 0.0, "distillation_loss": 0.1136234849691391, "epoch": 1.72, "learning_rate": 4.290782740065997e-05, "loss": 0.1169, "step": 1815, "task_loss": 0.14640654623508453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7517105586602743, "compression_loss": 0.0, "distillation_loss": 0.14775055646896362, "epoch": 1.72, "learning_rate": 4.290039076752799e-05, "loss": 0.1389, "step": 1816, "task_loss": 0.059689588844776154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7517324751991258, "compression_loss": 0.0, "distillation_loss": 0.20547989010810852, "epoch": 1.73, "learning_rate": 4.28929508827038e-05, "loss": 0.2006, "step": 1817, "task_loss": 0.15708599984645844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.751754385105646, "compression_loss": 0.0, "distillation_loss": 0.03952891379594803, "epoch": 1.73, "learning_rate": 4.288550774753892e-05, "loss": 0.0444, "step": 1818, "task_loss": 0.08782260119915009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7517762883808385, "compression_loss": 0.0, "distillation_loss": 0.0741434320807457, "epoch": 1.73, "learning_rate": 4.2878061363385414e-05, "loss": 0.0734, "step": 1819, "task_loss": 0.06647836416959763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7517981850257073, "compression_loss": 0.0, "distillation_loss": 0.08467371761798859, "epoch": 1.73, "learning_rate": 4.287061173159597e-05, "loss": 0.0828, "step": 1820, "task_loss": 0.06570696830749512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7518200750412558, "compression_loss": 0.0, "distillation_loss": 0.14830920100212097, "epoch": 1.73, "learning_rate": 4.286315885352382e-05, "loss": 0.1399, "step": 1821, "task_loss": 0.06412569433450699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7518419584284877, "compression_loss": 0.0, "distillation_loss": 0.13605275750160217, "epoch": 1.73, "learning_rate": 4.285570273052285e-05, "loss": 0.1302, "step": 1822, "task_loss": 0.07789816707372665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7518638351884068, "compression_loss": 0.0, "distillation_loss": 0.12208747863769531, "epoch": 1.73, "learning_rate": 4.2848243363947484e-05, "loss": 0.1132, "step": 1823, "task_loss": 0.032726749777793884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7518857053220167, "compression_loss": 0.0, "distillation_loss": 0.08085359632968903, "epoch": 1.73, "learning_rate": 4.2840780755152746e-05, "loss": 0.0879, "step": 1824, "task_loss": 0.1517452895641327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7519075688303211, "compression_loss": 0.0, "distillation_loss": 0.22030451893806458, "epoch": 1.73, "learning_rate": 4.283331490549426e-05, "loss": 0.2232, "step": 1825, "task_loss": 0.24884700775146484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7519294257143236, "compression_loss": 0.0, "distillation_loss": 0.23822423815727234, "epoch": 1.73, "learning_rate": 4.282584581632824e-05, "loss": 0.2456, "step": 1826, "task_loss": 0.3118036389350891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7519512759750281, "compression_loss": 0.0, "distillation_loss": 0.062073417007923126, "epoch": 1.74, "learning_rate": 4.281837348901148e-05, "loss": 0.0686, "step": 1827, "task_loss": 0.12697717547416687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7519731196134382, "compression_loss": 0.0, "distillation_loss": 0.03941449895501137, "epoch": 1.74, "learning_rate": 4.281089792490136e-05, "loss": 0.0391, "step": 1828, "task_loss": 0.036043956875801086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7519949566305574, "compression_loss": 0.0, "distillation_loss": 0.14730878174304962, "epoch": 1.74, "learning_rate": 4.280341912535585e-05, "loss": 0.1414, "step": 1829, "task_loss": 0.08822986483573914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7520167870273895, "compression_loss": 0.0, "distillation_loss": 0.2598916292190552, "epoch": 1.74, "learning_rate": 4.2795937091733515e-05, "loss": 0.2527, "step": 1830, "task_loss": 0.18784040212631226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7520386108049383, "compression_loss": 0.0, "distillation_loss": 0.0485055074095726, "epoch": 1.74, "learning_rate": 4.27884518253935e-05, "loss": 0.0557, "step": 1831, "task_loss": 0.12016290426254272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7520604279642074, "compression_loss": 0.0, "distillation_loss": 0.3198782205581665, "epoch": 1.74, "learning_rate": 4.278096332769555e-05, "loss": 0.3177, "step": 1832, "task_loss": 0.29813024401664734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7520822385062003, "compression_loss": 0.0, "distillation_loss": 0.10080970823764801, "epoch": 1.74, "learning_rate": 4.277347159999997e-05, "loss": 0.1022, "step": 1833, "task_loss": 0.11454527825117111 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752104042431921, "compression_loss": 0.0, "distillation_loss": 0.06464186310768127, "epoch": 1.74, "learning_rate": 4.276597664366767e-05, "loss": 0.0685, "step": 1834, "task_loss": 0.1035253256559372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7521258397423729, "compression_loss": 0.0, "distillation_loss": 0.2522902190685272, "epoch": 1.74, "learning_rate": 4.2758478460060166e-05, "loss": 0.2398, "step": 1835, "task_loss": 0.12788613140583038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7521476304385599, "compression_loss": 0.0, "distillation_loss": 0.15172149240970612, "epoch": 1.74, "learning_rate": 4.275097705053951e-05, "loss": 0.1433, "step": 1836, "task_loss": 0.06708873808383942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7521694145214856, "compression_loss": 0.0, "distillation_loss": 0.08506453782320023, "epoch": 1.74, "learning_rate": 4.2743472416468385e-05, "loss": 0.0814, "step": 1837, "task_loss": 0.048387959599494934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7521911919921536, "compression_loss": 0.0, "distillation_loss": 0.07385560125112534, "epoch": 1.75, "learning_rate": 4.2735964559210054e-05, "loss": 0.0693, "step": 1838, "task_loss": 0.028625313192605972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7522129628515677, "compression_loss": 0.0, "distillation_loss": 0.16923248767852783, "epoch": 1.75, "learning_rate": 4.272845348012833e-05, "loss": 0.1628, "step": 1839, "task_loss": 0.10537364333868027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7522347271007315, "compression_loss": 0.0, "distillation_loss": 0.3296600878238678, "epoch": 1.75, "learning_rate": 4.272093918058766e-05, "loss": 0.3125, "step": 1840, "task_loss": 0.1578628420829773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7522564847406487, "compression_loss": 0.0, "distillation_loss": 0.17389464378356934, "epoch": 1.75, "learning_rate": 4.271342166195304e-05, "loss": 0.173, "step": 1841, "task_loss": 0.1645306646823883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752278235772323, "compression_loss": 0.0, "distillation_loss": 0.08731499314308167, "epoch": 1.75, "learning_rate": 4.2705900925590056e-05, "loss": 0.081, "step": 1842, "task_loss": 0.024491865187883377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7522999801967581, "compression_loss": 0.0, "distillation_loss": 0.13219568133354187, "epoch": 1.75, "learning_rate": 4.269837697286491e-05, "loss": 0.1257, "step": 1843, "task_loss": 0.06739476323127747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7523217180149577, "compression_loss": 0.0, "distillation_loss": 0.06735164672136307, "epoch": 1.75, "learning_rate": 4.269084980514434e-05, "loss": 0.0733, "step": 1844, "task_loss": 0.12667877972126007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7523434492279254, "compression_loss": 0.0, "distillation_loss": 0.1707264930009842, "epoch": 1.75, "learning_rate": 4.268331942379571e-05, "loss": 0.1621, "step": 1845, "task_loss": 0.08454715460538864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752365173836665, "compression_loss": 0.0, "distillation_loss": 0.15691004693508148, "epoch": 1.75, "learning_rate": 4.267578583018694e-05, "loss": 0.1498, "step": 1846, "task_loss": 0.0854022279381752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.75238689184218, "compression_loss": 0.0, "distillation_loss": 0.17727318406105042, "epoch": 1.75, "learning_rate": 4.2668249025686545e-05, "loss": 0.1772, "step": 1847, "task_loss": 0.1768154799938202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7524086032454743, "compression_loss": 0.0, "distillation_loss": 0.14297039806842804, "epoch": 1.75, "learning_rate": 4.2660709011663624e-05, "loss": 0.1431, "step": 1848, "task_loss": 0.14434432983398438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7524303080475514, "compression_loss": 0.0, "distillation_loss": 0.12875759601593018, "epoch": 1.76, "learning_rate": 4.2653165789487864e-05, "loss": 0.128, "step": 1849, "task_loss": 0.12122198939323425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7524520062494151, "compression_loss": 0.0, "distillation_loss": 0.048097141087055206, "epoch": 1.76, "learning_rate": 4.2645619360529514e-05, "loss": 0.046, "step": 1850, "task_loss": 0.02708207257091999 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752473697852069, "compression_loss": 0.0, "distillation_loss": 0.04155917465686798, "epoch": 1.76, "learning_rate": 4.2638069726159424e-05, "loss": 0.0424, "step": 1851, "task_loss": 0.04985608160495758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7524953828565168, "compression_loss": 0.0, "distillation_loss": 0.06510326266288757, "epoch": 1.76, "learning_rate": 4.263051688774902e-05, "loss": 0.0617, "step": 1852, "task_loss": 0.03152014687657356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7525170612637623, "compression_loss": 0.0, "distillation_loss": 0.03942890465259552, "epoch": 1.76, "learning_rate": 4.262296084667032e-05, "loss": 0.0426, "step": 1853, "task_loss": 0.07090801745653152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752538733074809, "compression_loss": 0.0, "distillation_loss": 0.02327614463865757, "epoch": 1.76, "learning_rate": 4.2615401604295905e-05, "loss": 0.0213, "step": 1854, "task_loss": 0.0035562757402658463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7525603982906607, "compression_loss": 0.0, "distillation_loss": 0.20073172450065613, "epoch": 1.76, "learning_rate": 4.260783916199895e-05, "loss": 0.1898, "step": 1855, "task_loss": 0.09144563972949982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752582056912321, "compression_loss": 0.0, "distillation_loss": 0.12031463533639908, "epoch": 1.76, "learning_rate": 4.260027352115321e-05, "loss": 0.1236, "step": 1856, "task_loss": 0.15320178866386414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7526037089407938, "compression_loss": 0.0, "distillation_loss": 0.1642073094844818, "epoch": 1.76, "learning_rate": 4.2592704683133035e-05, "loss": 0.1642, "step": 1857, "task_loss": 0.16391614079475403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7526253543770824, "compression_loss": 0.0, "distillation_loss": 0.041345108300447464, "epoch": 1.76, "learning_rate": 4.258513264931331e-05, "loss": 0.0553, "step": 1858, "task_loss": 0.18059666454792023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7526469932221909, "compression_loss": 0.0, "distillation_loss": 0.41769933700561523, "epoch": 1.77, "learning_rate": 4.257755742106956e-05, "loss": 0.4052, "step": 1859, "task_loss": 0.29252398014068604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7526686254771225, "compression_loss": 0.0, "distillation_loss": 0.32178372144699097, "epoch": 1.77, "learning_rate": 4.256997899977784e-05, "loss": 0.3091, "step": 1860, "task_loss": 0.19510038197040558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7526902511428814, "compression_loss": 0.0, "distillation_loss": 0.13911914825439453, "epoch": 1.77, "learning_rate": 4.2562397386814823e-05, "loss": 0.133, "step": 1861, "task_loss": 0.07751937210559845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7527118702204709, "compression_loss": 0.0, "distillation_loss": 0.04612383246421814, "epoch": 1.77, "learning_rate": 4.255481258355773e-05, "loss": 0.0521, "step": 1862, "task_loss": 0.10636930912733078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7527334827108949, "compression_loss": 0.0, "distillation_loss": 0.1954609900712967, "epoch": 1.77, "learning_rate": 4.254722459138441e-05, "loss": 0.1994, "step": 1863, "task_loss": 0.23512773215770721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752755088615157, "compression_loss": 0.0, "distillation_loss": 0.29851043224334717, "epoch": 1.77, "learning_rate": 4.253963341167321e-05, "loss": 0.2852, "step": 1864, "task_loss": 0.16501733660697937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7527766879342609, "compression_loss": 0.0, "distillation_loss": 0.10969699919223785, "epoch": 1.77, "learning_rate": 4.253203904580314e-05, "loss": 0.1038, "step": 1865, "task_loss": 0.050924863666296005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7527982806692103, "compression_loss": 0.0, "distillation_loss": 0.1796002984046936, "epoch": 1.77, "learning_rate": 4.252444149515374e-05, "loss": 0.177, "step": 1866, "task_loss": 0.15360799431800842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7528198668210088, "compression_loss": 0.0, "distillation_loss": 0.14553160965442657, "epoch": 1.77, "learning_rate": 4.251684076110514e-05, "loss": 0.1432, "step": 1867, "task_loss": 0.122085340321064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7528414463906601, "compression_loss": 0.0, "distillation_loss": 0.2174375355243683, "epoch": 1.77, "learning_rate": 4.250923684503806e-05, "loss": 0.207, "step": 1868, "task_loss": 0.11338604241609573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7528630193791679, "compression_loss": 0.0, "distillation_loss": 0.29796043038368225, "epoch": 1.77, "learning_rate": 4.2501629748333774e-05, "loss": 0.2902, "step": 1869, "task_loss": 0.22043783962726593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7528845857875359, "compression_loss": 0.0, "distillation_loss": 0.12145286798477173, "epoch": 1.78, "learning_rate": 4.249401947237417e-05, "loss": 0.1337, "step": 1870, "task_loss": 0.24400418996810913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7529061456167679, "compression_loss": 0.0, "distillation_loss": 0.1129135936498642, "epoch": 1.78, "learning_rate": 4.248640601854166e-05, "loss": 0.1107, "step": 1871, "task_loss": 0.09064217656850815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7529276988678674, "compression_loss": 0.0, "distillation_loss": 0.2056802213191986, "epoch": 1.78, "learning_rate": 4.247878938821929e-05, "loss": 0.2075, "step": 1872, "task_loss": 0.22419533133506775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7529492455418382, "compression_loss": 0.0, "distillation_loss": 0.14987275004386902, "epoch": 1.78, "learning_rate": 4.247116958279065e-05, "loss": 0.1442, "step": 1873, "task_loss": 0.09318174421787262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7529707856396838, "compression_loss": 0.0, "distillation_loss": 0.09295307099819183, "epoch": 1.78, "learning_rate": 4.246354660363991e-05, "loss": 0.0863, "step": 1874, "task_loss": 0.026577245444059372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.752992319162408, "compression_loss": 0.0, "distillation_loss": 0.10765902698040009, "epoch": 1.78, "learning_rate": 4.245592045215182e-05, "loss": 0.1006, "step": 1875, "task_loss": 0.03689169883728027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7530138461110146, "compression_loss": 0.0, "distillation_loss": 0.08064229786396027, "epoch": 1.78, "learning_rate": 4.244829112971172e-05, "loss": 0.0793, "step": 1876, "task_loss": 0.06695515662431717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7530353664865072, "compression_loss": 0.0, "distillation_loss": 0.07568307220935822, "epoch": 1.78, "learning_rate": 4.24406586377055e-05, "loss": 0.0758, "step": 1877, "task_loss": 0.07722204923629761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7530568802898894, "compression_loss": 0.0, "distillation_loss": 0.16826829314231873, "epoch": 1.78, "learning_rate": 4.2433022977519645e-05, "loss": 0.1746, "step": 1878, "task_loss": 0.23110389709472656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7530783875221648, "compression_loss": 0.0, "distillation_loss": 0.22350825369358063, "epoch": 1.78, "learning_rate": 4.2425384150541206e-05, "loss": 0.2159, "step": 1879, "task_loss": 0.14728033542633057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7530998881843374, "compression_loss": 0.0, "distillation_loss": 0.13128520548343658, "epoch": 1.79, "learning_rate": 4.2417742158157816e-05, "loss": 0.1253, "step": 1880, "task_loss": 0.07127057015895844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7531213822774107, "compression_loss": 0.0, "distillation_loss": 0.062282584607601166, "epoch": 1.79, "learning_rate": 4.2410097001757676e-05, "loss": 0.0574, "step": 1881, "task_loss": 0.013064134865999222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7531428698023883, "compression_loss": 0.0, "distillation_loss": 0.014240816235542297, "epoch": 1.79, "learning_rate": 4.2402448682729566e-05, "loss": 0.0133, "step": 1882, "task_loss": 0.005121858790516853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.753164350760274, "compression_loss": 0.0, "distillation_loss": 0.08013554662466049, "epoch": 1.79, "learning_rate": 4.2394797202462844e-05, "loss": 0.074, "step": 1883, "task_loss": 0.019022256135940552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7531858251520714, "compression_loss": 0.0, "distillation_loss": 0.3540295362472534, "epoch": 1.79, "learning_rate": 4.238714256234744e-05, "loss": 0.3391, "step": 1884, "task_loss": 0.20519450306892395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7532072929787843, "compression_loss": 0.0, "distillation_loss": 0.21610870957374573, "epoch": 1.79, "learning_rate": 4.237948476377385e-05, "loss": 0.2104, "step": 1885, "task_loss": 0.15874995291233063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7532287542414162, "compression_loss": 0.0, "distillation_loss": 0.12253059446811676, "epoch": 1.79, "learning_rate": 4.237182380813315e-05, "loss": 0.1246, "step": 1886, "task_loss": 0.14349707961082458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.753250208940971, "compression_loss": 0.0, "distillation_loss": 0.02185000665485859, "epoch": 1.79, "learning_rate": 4.236415969681699e-05, "loss": 0.0202, "step": 1887, "task_loss": 0.00525074265897274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7532716570784523, "compression_loss": 0.0, "distillation_loss": 0.1840369701385498, "epoch": 1.79, "learning_rate": 4.23564924312176e-05, "loss": 0.1743, "step": 1888, "task_loss": 0.08696576207876205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7532930986548637, "compression_loss": 0.0, "distillation_loss": 0.23074445128440857, "epoch": 1.79, "learning_rate": 4.2348822012727765e-05, "loss": 0.2274, "step": 1889, "task_loss": 0.19752009212970734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7533145336712089, "compression_loss": 0.0, "distillation_loss": 0.04407363384962082, "epoch": 1.79, "learning_rate": 4.234114844274086e-05, "loss": 0.0408, "step": 1890, "task_loss": 0.01094069704413414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7533359621284917, "compression_loss": 0.0, "distillation_loss": 0.11152929067611694, "epoch": 1.8, "learning_rate": 4.2333471722650826e-05, "loss": 0.1117, "step": 1891, "task_loss": 0.11322666704654694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7533573840277157, "compression_loss": 0.0, "distillation_loss": 0.09612832963466644, "epoch": 1.8, "learning_rate": 4.232579185385217e-05, "loss": 0.0905, "step": 1892, "task_loss": 0.03963814303278923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7533787993698846, "compression_loss": 0.0, "distillation_loss": 0.3889869451522827, "epoch": 1.8, "learning_rate": 4.231810883773999e-05, "loss": 0.3714, "step": 1893, "task_loss": 0.2126784771680832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.753400208156002, "compression_loss": 0.0, "distillation_loss": 0.02580692060291767, "epoch": 1.8, "learning_rate": 4.231042267570993e-05, "loss": 0.0241, "step": 1894, "task_loss": 0.008529577404260635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7534216103870717, "compression_loss": 0.0, "distillation_loss": 0.03107302263379097, "epoch": 1.8, "learning_rate": 4.230273336915822e-05, "loss": 0.0289, "step": 1895, "task_loss": 0.0094615388661623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7534430060640973, "compression_loss": 0.0, "distillation_loss": 0.03097601607441902, "epoch": 1.8, "learning_rate": 4.2295040919481664e-05, "loss": 0.0287, "step": 1896, "task_loss": 0.007949141785502434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7534643951880825, "compression_loss": 0.0, "distillation_loss": 0.05745317414402962, "epoch": 1.8, "learning_rate": 4.228734532807763e-05, "loss": 0.0657, "step": 1897, "task_loss": 0.13991054892539978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.753485777760031, "compression_loss": 0.0, "distillation_loss": 0.028573032468557358, "epoch": 1.8, "learning_rate": 4.2279646596344067e-05, "loss": 0.0357, "step": 1898, "task_loss": 0.09934914112091064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7535071537809466, "compression_loss": 0.0, "distillation_loss": 0.18852916359901428, "epoch": 1.8, "learning_rate": 4.227194472567948e-05, "loss": 0.1943, "step": 1899, "task_loss": 0.24627923965454102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7535285232518327, "compression_loss": 0.0, "distillation_loss": 0.24925187230110168, "epoch": 1.8, "learning_rate": 4.2264239717482945e-05, "loss": 0.2396, "step": 1900, "task_loss": 0.15310557186603546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7535498861736932, "compression_loss": 0.0, "distillation_loss": 0.0920097678899765, "epoch": 1.81, "learning_rate": 4.225653157315412e-05, "loss": 0.0899, "step": 1901, "task_loss": 0.07052158564329147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7535712425475317, "compression_loss": 0.0, "distillation_loss": 0.13238155841827393, "epoch": 1.81, "learning_rate": 4.224882029409323e-05, "loss": 0.1305, "step": 1902, "task_loss": 0.1139896884560585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7535925923743519, "compression_loss": 0.0, "distillation_loss": 0.044050946831703186, "epoch": 1.81, "learning_rate": 4.224110588170106e-05, "loss": 0.0424, "step": 1903, "task_loss": 0.027057217434048653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7536139356551576, "compression_loss": 0.0, "distillation_loss": 0.1628579944372177, "epoch": 1.81, "learning_rate": 4.223338833737898e-05, "loss": 0.154, "step": 1904, "task_loss": 0.07387073338031769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7536352723909523, "compression_loss": 0.0, "distillation_loss": 0.1415494978427887, "epoch": 1.81, "learning_rate": 4.22256676625289e-05, "loss": 0.1376, "step": 1905, "task_loss": 0.10222296416759491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7536566025827397, "compression_loss": 0.0, "distillation_loss": 0.016083184629678726, "epoch": 1.81, "learning_rate": 4.221794385855334e-05, "loss": 0.0149, "step": 1906, "task_loss": 0.003827514126896858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7536779262315236, "compression_loss": 0.0, "distillation_loss": 0.02969953790307045, "epoch": 1.81, "learning_rate": 4.221021692685534e-05, "loss": 0.0271, "step": 1907, "task_loss": 0.0035870037972927094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7536992433383076, "compression_loss": 0.0, "distillation_loss": 0.13226908445358276, "epoch": 1.81, "learning_rate": 4.220248686883857e-05, "loss": 0.1333, "step": 1908, "task_loss": 0.14230315387248993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7537205539040954, "compression_loss": 0.0, "distillation_loss": 0.09493161737918854, "epoch": 1.81, "learning_rate": 4.21947536859072e-05, "loss": 0.0904, "step": 1909, "task_loss": 0.04967789724469185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7537418579298907, "compression_loss": 0.0, "distillation_loss": 0.2932942509651184, "epoch": 1.81, "learning_rate": 4.218701737946601e-05, "loss": 0.2855, "step": 1910, "task_loss": 0.21534579992294312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7537631554166971, "compression_loss": 0.0, "distillation_loss": 0.2947857081890106, "epoch": 1.81, "learning_rate": 4.217927795092034e-05, "loss": 0.2831, "step": 1911, "task_loss": 0.17762699723243713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7537844463655184, "compression_loss": 0.0, "distillation_loss": 0.2603399455547333, "epoch": 1.82, "learning_rate": 4.21715354016761e-05, "loss": 0.2464, "step": 1912, "task_loss": 0.12051122635602951 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7538057307773582, "compression_loss": 0.0, "distillation_loss": 0.19969797134399414, "epoch": 1.82, "learning_rate": 4.216378973313976e-05, "loss": 0.1898, "step": 1913, "task_loss": 0.10046619176864624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7538270086532203, "compression_loss": 0.0, "distillation_loss": 0.08806608617305756, "epoch": 1.82, "learning_rate": 4.215604094671835e-05, "loss": 0.0839, "step": 1914, "task_loss": 0.04651288688182831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7538482799941082, "compression_loss": 0.0, "distillation_loss": 0.09663443267345428, "epoch": 1.82, "learning_rate": 4.214828904381947e-05, "loss": 0.1037, "step": 1915, "task_loss": 0.16719813644886017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7538695448010257, "compression_loss": 0.0, "distillation_loss": 0.20862799882888794, "epoch": 1.82, "learning_rate": 4.21405340258513e-05, "loss": 0.2019, "step": 1916, "task_loss": 0.14169104397296906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7538908030749765, "compression_loss": 0.0, "distillation_loss": 0.11301036179065704, "epoch": 1.82, "learning_rate": 4.213277589422258e-05, "loss": 0.117, "step": 1917, "task_loss": 0.1530938744544983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7539120548169642, "compression_loss": 0.0, "distillation_loss": 0.3912833333015442, "epoch": 1.82, "learning_rate": 4.21250146503426e-05, "loss": 0.388, "step": 1918, "task_loss": 0.358115017414093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7539333000279925, "compression_loss": 0.0, "distillation_loss": 0.057676736265420914, "epoch": 1.82, "learning_rate": 4.2117250295621235e-05, "loss": 0.0556, "step": 1919, "task_loss": 0.03697335347533226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7539545387090651, "compression_loss": 0.0, "distillation_loss": 0.08969536423683167, "epoch": 1.82, "learning_rate": 4.210948283146892e-05, "loss": 0.0894, "step": 1920, "task_loss": 0.08631106466054916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7539757708611857, "compression_loss": 0.0, "distillation_loss": 0.17513608932495117, "epoch": 1.82, "learning_rate": 4.210171225929664e-05, "loss": 0.1674, "step": 1921, "task_loss": 0.09771417081356049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.753996996485358, "compression_loss": 0.0, "distillation_loss": 0.1546175479888916, "epoch": 1.83, "learning_rate": 4.209393858051598e-05, "loss": 0.1516, "step": 1922, "task_loss": 0.12429603934288025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7540182155825856, "compression_loss": 0.0, "distillation_loss": 0.28697076439857483, "epoch": 1.83, "learning_rate": 4.208616179653903e-05, "loss": 0.2767, "step": 1923, "task_loss": 0.18397051095962524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7540394281538723, "compression_loss": 0.0, "distillation_loss": 0.04862765967845917, "epoch": 1.83, "learning_rate": 4.207838190877852e-05, "loss": 0.0495, "step": 1924, "task_loss": 0.05719340965151787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7540606342002216, "compression_loss": 0.0, "distillation_loss": 0.2369440346956253, "epoch": 1.83, "learning_rate": 4.2070598918647683e-05, "loss": 0.2375, "step": 1925, "task_loss": 0.2421676218509674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7540818337226374, "compression_loss": 0.0, "distillation_loss": 0.10661203414201736, "epoch": 1.83, "learning_rate": 4.206281282756034e-05, "loss": 0.101, "step": 1926, "task_loss": 0.050882913172245026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7541030267221233, "compression_loss": 0.0, "distillation_loss": 0.09290862083435059, "epoch": 1.83, "learning_rate": 4.205502363693087e-05, "loss": 0.0886, "step": 1927, "task_loss": 0.04993749409914017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7541242131996828, "compression_loss": 0.0, "distillation_loss": 0.13861539959907532, "epoch": 1.83, "learning_rate": 4.204723134817422e-05, "loss": 0.1414, "step": 1928, "task_loss": 0.16668032109737396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7541453931563199, "compression_loss": 0.0, "distillation_loss": 0.1590609848499298, "epoch": 1.83, "learning_rate": 4.2039435962705886e-05, "loss": 0.1669, "step": 1929, "task_loss": 0.23708070814609528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754166566593038, "compression_loss": 0.0, "distillation_loss": 0.035407066345214844, "epoch": 1.83, "learning_rate": 4.2031637481941954e-05, "loss": 0.0332, "step": 1930, "task_loss": 0.013283960521221161 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754187733510841, "compression_loss": 0.0, "distillation_loss": 0.1811191439628601, "epoch": 1.83, "learning_rate": 4.202383590729905e-05, "loss": 0.1727, "step": 1931, "task_loss": 0.09702938795089722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7542088939107325, "compression_loss": 0.0, "distillation_loss": 0.10961516201496124, "epoch": 1.83, "learning_rate": 4.201603124019436e-05, "loss": 0.1087, "step": 1932, "task_loss": 0.10052899271249771 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754230047793716, "compression_loss": 0.0, "distillation_loss": 0.249653160572052, "epoch": 1.84, "learning_rate": 4.200822348204565e-05, "loss": 0.2377, "step": 1933, "task_loss": 0.12971001863479614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7542511951607956, "compression_loss": 0.0, "distillation_loss": 0.12614881992340088, "epoch": 1.84, "learning_rate": 4.200041263427123e-05, "loss": 0.1307, "step": 1934, "task_loss": 0.17151018977165222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7542723360129746, "compression_loss": 0.0, "distillation_loss": 0.18562328815460205, "epoch": 1.84, "learning_rate": 4.199259869828998e-05, "loss": 0.1845, "step": 1935, "task_loss": 0.1747492104768753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7542934703512568, "compression_loss": 0.0, "distillation_loss": 0.07814265787601471, "epoch": 1.84, "learning_rate": 4.1984781675521345e-05, "loss": 0.0725, "step": 1936, "task_loss": 0.021998286247253418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7543145981766459, "compression_loss": 0.0, "distillation_loss": 0.06505021452903748, "epoch": 1.84, "learning_rate": 4.1976961567385306e-05, "loss": 0.0609, "step": 1937, "task_loss": 0.02330428548157215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7543357194901457, "compression_loss": 0.0, "distillation_loss": 0.043614309281110764, "epoch": 1.84, "learning_rate": 4.1969138375302445e-05, "loss": 0.0402, "step": 1938, "task_loss": 0.009535277262330055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7543568342927597, "compression_loss": 0.0, "distillation_loss": 0.12613525986671448, "epoch": 1.84, "learning_rate": 4.1961312100693874e-05, "loss": 0.1151, "step": 1939, "task_loss": 0.016281738877296448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7543779425854915, "compression_loss": 0.0, "distillation_loss": 0.23481710255146027, "epoch": 1.84, "learning_rate": 4.1953482744981274e-05, "loss": 0.2348, "step": 1940, "task_loss": 0.23426470160484314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7543990443693451, "compression_loss": 0.0, "distillation_loss": 0.17542167007923126, "epoch": 1.84, "learning_rate": 4.194565030958688e-05, "loss": 0.1797, "step": 1941, "task_loss": 0.2181302309036255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754420139645324, "compression_loss": 0.0, "distillation_loss": 0.04640250653028488, "epoch": 1.84, "learning_rate": 4.19378147959335e-05, "loss": 0.0431, "step": 1942, "task_loss": 0.013633305206894875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7544412284144317, "compression_loss": 0.0, "distillation_loss": 0.09803412854671478, "epoch": 1.85, "learning_rate": 4.192997620544449e-05, "loss": 0.099, "step": 1943, "task_loss": 0.10792693495750427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7544623106776723, "compression_loss": 0.0, "distillation_loss": 0.1254318654537201, "epoch": 1.85, "learning_rate": 4.192213453954377e-05, "loss": 0.1264, "step": 1944, "task_loss": 0.13558337092399597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7544833864360492, "compression_loss": 0.0, "distillation_loss": 0.028305238112807274, "epoch": 1.85, "learning_rate": 4.19142897996558e-05, "loss": 0.026, "step": 1945, "task_loss": 0.005378095433115959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7545044556905661, "compression_loss": 0.0, "distillation_loss": 0.08470214158296585, "epoch": 1.85, "learning_rate": 4.190644198720563e-05, "loss": 0.0819, "step": 1946, "task_loss": 0.05680760368704796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7545255184422267, "compression_loss": 0.0, "distillation_loss": 0.2368120402097702, "epoch": 1.85, "learning_rate": 4.189859110361886e-05, "loss": 0.231, "step": 1947, "task_loss": 0.17879217863082886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7545465746920347, "compression_loss": 0.0, "distillation_loss": 0.12444418668746948, "epoch": 1.85, "learning_rate": 4.189073715032163e-05, "loss": 0.1211, "step": 1948, "task_loss": 0.0914890468120575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7545676244409938, "compression_loss": 0.0, "distillation_loss": 0.1032329723238945, "epoch": 1.85, "learning_rate": 4.188288012874065e-05, "loss": 0.1, "step": 1949, "task_loss": 0.07098895311355591 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7545886676901077, "compression_loss": 0.0, "distillation_loss": 0.6745878458023071, "epoch": 1.85, "learning_rate": 4.187502004030318e-05, "loss": 0.6498, "step": 1950, "task_loss": 0.4266480803489685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.75460970444038, "compression_loss": 0.0, "distillation_loss": 0.2606114149093628, "epoch": 1.85, "learning_rate": 4.186715688643705e-05, "loss": 0.2448, "step": 1951, "task_loss": 0.10222074389457703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7546307346928145, "compression_loss": 0.0, "distillation_loss": 0.07269393652677536, "epoch": 1.85, "learning_rate": 4.185929066857064e-05, "loss": 0.0787, "step": 1952, "task_loss": 0.1328985095024109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7546517584484147, "compression_loss": 0.0, "distillation_loss": 0.03857032582163811, "epoch": 1.85, "learning_rate": 4.1851421388132886e-05, "loss": 0.0352, "step": 1953, "task_loss": 0.004427826032042503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7546727757081845, "compression_loss": 0.0, "distillation_loss": 0.025319568812847137, "epoch": 1.86, "learning_rate": 4.1843549046553284e-05, "loss": 0.0232, "step": 1954, "task_loss": 0.004016607999801636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7546937864731275, "compression_loss": 0.0, "distillation_loss": 0.14767573773860931, "epoch": 1.86, "learning_rate": 4.183567364526186e-05, "loss": 0.1589, "step": 1955, "task_loss": 0.2602040767669678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7547147907442472, "compression_loss": 0.0, "distillation_loss": 0.19196276366710663, "epoch": 1.86, "learning_rate": 4.182779518568926e-05, "loss": 0.1933, "step": 1956, "task_loss": 0.2052653729915619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7547357885225475, "compression_loss": 0.0, "distillation_loss": 0.168940469622612, "epoch": 1.86, "learning_rate": 4.181991366926661e-05, "loss": 0.1697, "step": 1957, "task_loss": 0.1767549216747284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7547567798090321, "compression_loss": 0.0, "distillation_loss": 0.06519413739442825, "epoch": 1.86, "learning_rate": 4.181202909742564e-05, "loss": 0.0633, "step": 1958, "task_loss": 0.04639093205332756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7547777646047046, "compression_loss": 0.0, "distillation_loss": 0.23005223274230957, "epoch": 1.86, "learning_rate": 4.1804141471598604e-05, "loss": 0.22, "step": 1959, "task_loss": 0.1295807808637619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7547987429105686, "compression_loss": 0.0, "distillation_loss": 0.15140117704868317, "epoch": 1.86, "learning_rate": 4.179625079321836e-05, "loss": 0.1443, "step": 1960, "task_loss": 0.08072268962860107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754819714727628, "compression_loss": 0.0, "distillation_loss": 0.15924590826034546, "epoch": 1.86, "learning_rate": 4.1788357063718254e-05, "loss": 0.1564, "step": 1961, "task_loss": 0.1309977024793625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7548406800568862, "compression_loss": 0.0, "distillation_loss": 0.09538164734840393, "epoch": 1.86, "learning_rate": 4.178046028453224e-05, "loss": 0.091, "step": 1962, "task_loss": 0.05167919024825096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7548616388993471, "compression_loss": 0.0, "distillation_loss": 0.12580883502960205, "epoch": 1.86, "learning_rate": 4.1772560457094795e-05, "loss": 0.123, "step": 1963, "task_loss": 0.09781771153211594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7548825912560143, "compression_loss": 0.0, "distillation_loss": 0.06123030185699463, "epoch": 1.87, "learning_rate": 4.1764657582840965e-05, "loss": 0.0621, "step": 1964, "task_loss": 0.069994255900383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7549035371278915, "compression_loss": 0.0, "distillation_loss": 0.09012752026319504, "epoch": 1.87, "learning_rate": 4.175675166320635e-05, "loss": 0.0856, "step": 1965, "task_loss": 0.045101843774318695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7549244765159824, "compression_loss": 0.0, "distillation_loss": 0.18190321326255798, "epoch": 1.87, "learning_rate": 4.1748842699627094e-05, "loss": 0.178, "step": 1966, "task_loss": 0.14312417805194855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7549454094212906, "compression_loss": 0.0, "distillation_loss": 0.2552461326122284, "epoch": 1.87, "learning_rate": 4.17409306935399e-05, "loss": 0.2553, "step": 1967, "task_loss": 0.25603681802749634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.75496633584482, "compression_loss": 0.0, "distillation_loss": 0.05597781017422676, "epoch": 1.87, "learning_rate": 4.173301564638201e-05, "loss": 0.0518, "step": 1968, "task_loss": 0.014608925208449364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.754987255787574, "compression_loss": 0.0, "distillation_loss": 0.24270398914813995, "epoch": 1.87, "learning_rate": 4.1725097559591256e-05, "loss": 0.2384, "step": 1969, "task_loss": 0.19955337047576904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7550081692505564, "compression_loss": 0.0, "distillation_loss": 0.07123062014579773, "epoch": 1.87, "learning_rate": 4.1717176434605967e-05, "loss": 0.0911, "step": 1970, "task_loss": 0.2696712017059326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7550290762347709, "compression_loss": 0.0, "distillation_loss": 0.07343777269124985, "epoch": 1.87, "learning_rate": 4.170925227286508e-05, "loss": 0.0777, "step": 1971, "task_loss": 0.11573462188243866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7550499767412212, "compression_loss": 0.0, "distillation_loss": 0.45374932885169983, "epoch": 1.87, "learning_rate": 4.170132507580803e-05, "loss": 0.4348, "step": 1972, "task_loss": 0.26441484689712524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.755070870770911, "compression_loss": 0.0, "distillation_loss": 0.07927072793245316, "epoch": 1.87, "learning_rate": 4.1693394844874856e-05, "loss": 0.0938, "step": 1973, "task_loss": 0.22447620332241058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7550917583248438, "compression_loss": 0.0, "distillation_loss": 0.10215029865503311, "epoch": 1.87, "learning_rate": 4.1685461581506115e-05, "loss": 0.1084, "step": 1974, "task_loss": 0.16435641050338745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7551126394040235, "compression_loss": 0.0, "distillation_loss": 0.1036095917224884, "epoch": 1.88, "learning_rate": 4.167752528714291e-05, "loss": 0.096, "step": 1975, "task_loss": 0.027509452775120735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7551335140094537, "compression_loss": 0.0, "distillation_loss": 0.19809332489967346, "epoch": 1.88, "learning_rate": 4.166958596322692e-05, "loss": 0.1908, "step": 1976, "task_loss": 0.1248994842171669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.755154382142138, "compression_loss": 0.0, "distillation_loss": 0.10628242045640945, "epoch": 1.88, "learning_rate": 4.1661643611200366e-05, "loss": 0.1052, "step": 1977, "task_loss": 0.09579187631607056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7551752438030803, "compression_loss": 0.0, "distillation_loss": 0.3265897333621979, "epoch": 1.88, "learning_rate": 4.1653698232506e-05, "loss": 0.311, "step": 1978, "task_loss": 0.17026562988758087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7551960989932841, "compression_loss": 0.0, "distillation_loss": 0.44924837350845337, "epoch": 1.88, "learning_rate": 4.1645749828587145e-05, "loss": 0.4407, "step": 1979, "task_loss": 0.36339446902275085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7552169477137531, "compression_loss": 0.0, "distillation_loss": 0.2017005980014801, "epoch": 1.88, "learning_rate": 4.1637798400887674e-05, "loss": 0.1914, "step": 1980, "task_loss": 0.09869912266731262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7552377899654911, "compression_loss": 0.0, "distillation_loss": 0.24423138797283173, "epoch": 1.88, "learning_rate": 4.162984395085198e-05, "loss": 0.2326, "step": 1981, "task_loss": 0.12757530808448792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7552586257495015, "compression_loss": 0.0, "distillation_loss": 0.06164749711751938, "epoch": 1.88, "learning_rate": 4.162188647992506e-05, "loss": 0.0573, "step": 1982, "task_loss": 0.018155789002776146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7552794550667884, "compression_loss": 0.0, "distillation_loss": 0.3318878710269928, "epoch": 1.88, "learning_rate": 4.161392598955239e-05, "loss": 0.3315, "step": 1983, "task_loss": 0.328228622674942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7553002779183551, "compression_loss": 0.0, "distillation_loss": 0.13821963965892792, "epoch": 1.88, "learning_rate": 4.160596248118007e-05, "loss": 0.1504, "step": 1984, "task_loss": 0.25973179936408997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7553210943052054, "compression_loss": 0.0, "distillation_loss": 0.11968272924423218, "epoch": 1.89, "learning_rate": 4.159799595625468e-05, "loss": 0.1284, "step": 1985, "task_loss": 0.20697957277297974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7553419042283431, "compression_loss": 0.0, "distillation_loss": 0.14230716228485107, "epoch": 1.89, "learning_rate": 4.159002641622338e-05, "loss": 0.1497, "step": 1986, "task_loss": 0.21655035018920898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7553627076887719, "compression_loss": 0.0, "distillation_loss": 0.053881511092185974, "epoch": 1.89, "learning_rate": 4.1582053862533895e-05, "loss": 0.0549, "step": 1987, "task_loss": 0.06384479999542236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7553835046874953, "compression_loss": 0.0, "distillation_loss": 0.16238471865653992, "epoch": 1.89, "learning_rate": 4.157407829663446e-05, "loss": 0.1679, "step": 1988, "task_loss": 0.21752187609672546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7554042952255171, "compression_loss": 0.0, "distillation_loss": 0.08790767192840576, "epoch": 1.89, "learning_rate": 4.1566099719973884e-05, "loss": 0.084, "step": 1989, "task_loss": 0.04870061203837395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7554250793038408, "compression_loss": 0.0, "distillation_loss": 0.12433084100484848, "epoch": 1.89, "learning_rate": 4.1558118134001514e-05, "loss": 0.1188, "step": 1990, "task_loss": 0.06881560385227203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7554458569234704, "compression_loss": 0.0, "distillation_loss": 0.19472339749336243, "epoch": 1.89, "learning_rate": 4.155013354016723e-05, "loss": 0.1847, "step": 1991, "task_loss": 0.0943620428442955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7554666280854094, "compression_loss": 0.0, "distillation_loss": 0.12036952376365662, "epoch": 1.89, "learning_rate": 4.154214593992149e-05, "loss": 0.1135, "step": 1992, "task_loss": 0.05154300481081009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7554873927906615, "compression_loss": 0.0, "distillation_loss": 0.16188105940818787, "epoch": 1.89, "learning_rate": 4.1534155334715264e-05, "loss": 0.1657, "step": 1993, "task_loss": 0.20030921697616577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7555081510402303, "compression_loss": 0.0, "distillation_loss": 0.03598017245531082, "epoch": 1.89, "learning_rate": 4.15261617260001e-05, "loss": 0.0422, "step": 1994, "task_loss": 0.09828056395053864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7555289028351195, "compression_loss": 0.0, "distillation_loss": 0.07038508355617523, "epoch": 1.89, "learning_rate": 4.151816511522807e-05, "loss": 0.0708, "step": 1995, "task_loss": 0.07408405095338821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.755549648176333, "compression_loss": 0.0, "distillation_loss": 0.0785357877612114, "epoch": 1.9, "learning_rate": 4.151016550385179e-05, "loss": 0.0895, "step": 1996, "task_loss": 0.18811197578907013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7555703870648742, "compression_loss": 0.0, "distillation_loss": 0.06988924741744995, "epoch": 1.9, "learning_rate": 4.150216289332443e-05, "loss": 0.0647, "step": 1997, "task_loss": 0.018136270344257355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7555911195017471, "compression_loss": 0.0, "distillation_loss": 0.11623725295066833, "epoch": 1.9, "learning_rate": 4.149415728509971e-05, "loss": 0.1145, "step": 1998, "task_loss": 0.09836913645267487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.755611845487955, "compression_loss": 0.0, "distillation_loss": 0.0843459814786911, "epoch": 1.9, "learning_rate": 4.1486148680631875e-05, "loss": 0.0788, "step": 1999, "task_loss": 0.028761208057403564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7556325650245018, "compression_loss": 0.0, "distillation_loss": 0.16870735585689545, "epoch": 1.9, "learning_rate": 4.147813708137574e-05, "loss": 0.1591, "step": 2000, "task_loss": 0.07283841073513031 }, { "epoch": 1.9, "eval_accuracy": 0.9013761467889908, "eval_loss": 0.3940832018852234, "eval_runtime": 18.3803, "eval_samples_per_second": 47.442, "eval_steps_per_second": 5.93, "step": 2000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7556532781123911, "compression_loss": 0.0, "distillation_loss": 0.2963669002056122, "epoch": 1.9, "learning_rate": 4.1470122488786645e-05, "loss": 0.2864, "step": 2001, "task_loss": 0.1968749761581421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7556739847526267, "compression_loss": 0.0, "distillation_loss": 0.05983327701687813, "epoch": 1.9, "learning_rate": 4.146210490432048e-05, "loss": 0.0605, "step": 2002, "task_loss": 0.06697780638933182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7556946849462123, "compression_loss": 0.0, "distillation_loss": 0.07671034336090088, "epoch": 1.9, "learning_rate": 4.1454084329433674e-05, "loss": 0.0927, "step": 2003, "task_loss": 0.23632298409938812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7557153786941514, "compression_loss": 0.0, "distillation_loss": 0.14522545039653778, "epoch": 1.9, "learning_rate": 4.144606076558321e-05, "loss": 0.1386, "step": 2004, "task_loss": 0.07935189455747604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7557360659974478, "compression_loss": 0.0, "distillation_loss": 0.25535622239112854, "epoch": 1.9, "learning_rate": 4.14380342142266e-05, "loss": 0.2454, "step": 2005, "task_loss": 0.1559627205133438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7557567468571051, "compression_loss": 0.0, "distillation_loss": 0.09220820665359497, "epoch": 1.91, "learning_rate": 4.14300046768219e-05, "loss": 0.0889, "step": 2006, "task_loss": 0.05925717204809189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7557774212741272, "compression_loss": 0.0, "distillation_loss": 0.21450912952423096, "epoch": 1.91, "learning_rate": 4.1421972154827724e-05, "loss": 0.2048, "step": 2007, "task_loss": 0.11772306263446808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7557980892495175, "compression_loss": 0.0, "distillation_loss": 0.16318204998970032, "epoch": 1.91, "learning_rate": 4.141393664970323e-05, "loss": 0.1539, "step": 2008, "task_loss": 0.0701281875371933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7558187507842798, "compression_loss": 0.0, "distillation_loss": 0.0389268696308136, "epoch": 1.91, "learning_rate": 4.140589816290808e-05, "loss": 0.0364, "step": 2009, "task_loss": 0.013847017660737038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7558394058794179, "compression_loss": 0.0, "distillation_loss": 0.047759756445884705, "epoch": 1.91, "learning_rate": 4.1397856695902535e-05, "loss": 0.085, "step": 2010, "task_loss": 0.4196968376636505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7558600545359353, "compression_loss": 0.0, "distillation_loss": 0.0479925200343132, "epoch": 1.91, "learning_rate": 4.138981225014733e-05, "loss": 0.0445, "step": 2011, "task_loss": 0.012897208333015442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7558806967548358, "compression_loss": 0.0, "distillation_loss": 0.05814354866743088, "epoch": 1.91, "learning_rate": 4.1381764827103806e-05, "loss": 0.0544, "step": 2012, "task_loss": 0.0203425120562315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.755901332537123, "compression_loss": 0.0, "distillation_loss": 0.09011336416006088, "epoch": 1.91, "learning_rate": 4.13737144282338e-05, "loss": 0.0902, "step": 2013, "task_loss": 0.09071313589811325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7559219618838006, "compression_loss": 0.0, "distillation_loss": 0.055356744676828384, "epoch": 1.91, "learning_rate": 4.1365661054999715e-05, "loss": 0.0554, "step": 2014, "task_loss": 0.05614471435546875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7559425847958723, "compression_loss": 0.0, "distillation_loss": 0.1986648440361023, "epoch": 1.91, "learning_rate": 4.1357604708864475e-05, "loss": 0.2015, "step": 2015, "task_loss": 0.22709903120994568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7559632012743418, "compression_loss": 0.0, "distillation_loss": 0.1105402335524559, "epoch": 1.91, "learning_rate": 4.1349545391291563e-05, "loss": 0.1051, "step": 2016, "task_loss": 0.05623643100261688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7559838113202129, "compression_loss": 0.0, "distillation_loss": 0.12236940860748291, "epoch": 1.92, "learning_rate": 4.1341483103745006e-05, "loss": 0.1174, "step": 2017, "task_loss": 0.07233867049217224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7560044149344889, "compression_loss": 0.0, "distillation_loss": 0.17869043350219727, "epoch": 1.92, "learning_rate": 4.133341784768933e-05, "loss": 0.1765, "step": 2018, "task_loss": 0.156291663646698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7560250121181739, "compression_loss": 0.0, "distillation_loss": 0.28349682688713074, "epoch": 1.92, "learning_rate": 4.132534962458962e-05, "loss": 0.2765, "step": 2019, "task_loss": 0.21306130290031433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7560456028722713, "compression_loss": 0.0, "distillation_loss": 0.10640447586774826, "epoch": 1.92, "learning_rate": 4.131727843591155e-05, "loss": 0.1149, "step": 2020, "task_loss": 0.1911725401878357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.756066187197785, "compression_loss": 0.0, "distillation_loss": 0.13865335285663605, "epoch": 1.92, "learning_rate": 4.130920428312127e-05, "loss": 0.1432, "step": 2021, "task_loss": 0.18457633256912231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7560867650957186, "compression_loss": 0.0, "distillation_loss": 0.06674148142337799, "epoch": 1.92, "learning_rate": 4.130112716768548e-05, "loss": 0.081, "step": 2022, "task_loss": 0.2092704176902771 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7561073365670756, "compression_loss": 0.0, "distillation_loss": 0.03668634220957756, "epoch": 1.92, "learning_rate": 4.129304709107143e-05, "loss": 0.0345, "step": 2023, "task_loss": 0.014327209442853928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7561279016128599, "compression_loss": 0.0, "distillation_loss": 0.08399326354265213, "epoch": 1.92, "learning_rate": 4.128496405474691e-05, "loss": 0.0867, "step": 2024, "task_loss": 0.11154383420944214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7561484602340752, "compression_loss": 0.0, "distillation_loss": 0.16036614775657654, "epoch": 1.92, "learning_rate": 4.127687806018024e-05, "loss": 0.1671, "step": 2025, "task_loss": 0.22741574048995972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7561690124317251, "compression_loss": 0.0, "distillation_loss": 0.02608412131667137, "epoch": 1.92, "learning_rate": 4.1268789108840275e-05, "loss": 0.0239, "step": 2026, "task_loss": 0.004486914724111557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7561895582068132, "compression_loss": 0.0, "distillation_loss": 0.04416799545288086, "epoch": 1.92, "learning_rate": 4.126069720219642e-05, "loss": 0.0411, "step": 2027, "task_loss": 0.013907143846154213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7562100975603433, "compression_loss": 0.0, "distillation_loss": 0.24007698893547058, "epoch": 1.93, "learning_rate": 4.125260234171861e-05, "loss": 0.2428, "step": 2028, "task_loss": 0.2669823169708252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7562306304933191, "compression_loss": 0.0, "distillation_loss": 0.19312305748462677, "epoch": 1.93, "learning_rate": 4.12445045288773e-05, "loss": 0.1849, "step": 2029, "task_loss": 0.11117222160100937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7562511570067442, "compression_loss": 0.0, "distillation_loss": 0.3301912546157837, "epoch": 1.93, "learning_rate": 4.123640376514353e-05, "loss": 0.316, "step": 2030, "task_loss": 0.18828773498535156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7562716771016224, "compression_loss": 0.0, "distillation_loss": 0.022117741405963898, "epoch": 1.93, "learning_rate": 4.12283000519888e-05, "loss": 0.0202, "step": 2031, "task_loss": 0.002740517258644104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7562921907789572, "compression_loss": 0.0, "distillation_loss": 0.2477855086326599, "epoch": 1.93, "learning_rate": 4.122019339088522e-05, "loss": 0.2442, "step": 2032, "task_loss": 0.2123817652463913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7563126980397523, "compression_loss": 0.0, "distillation_loss": 0.06262822449207306, "epoch": 1.93, "learning_rate": 4.121208378330539e-05, "loss": 0.0674, "step": 2033, "task_loss": 0.11077867448329926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7563331988850116, "compression_loss": 0.0, "distillation_loss": 0.12019616365432739, "epoch": 1.93, "learning_rate": 4.120397123072246e-05, "loss": 0.1144, "step": 2034, "task_loss": 0.06211775913834572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7563536933157385, "compression_loss": 0.0, "distillation_loss": 0.09517644345760345, "epoch": 1.93, "learning_rate": 4.119585573461012e-05, "loss": 0.087, "step": 2035, "task_loss": 0.013532513752579689 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.756374181332937, "compression_loss": 0.0, "distillation_loss": 0.1831834316253662, "epoch": 1.93, "learning_rate": 4.118773729644258e-05, "loss": 0.1754, "step": 2036, "task_loss": 0.10511328279972076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7563946629376105, "compression_loss": 0.0, "distillation_loss": 0.17582698166370392, "epoch": 1.93, "learning_rate": 4.11796159176946e-05, "loss": 0.1714, "step": 2037, "task_loss": 0.13160312175750732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7564151381307629, "compression_loss": 0.0, "distillation_loss": 0.19995486736297607, "epoch": 1.94, "learning_rate": 4.117149159984147e-05, "loss": 0.1948, "step": 2038, "task_loss": 0.14824716746807098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7564356069133976, "compression_loss": 0.0, "distillation_loss": 0.33650684356689453, "epoch": 1.94, "learning_rate": 4.116336434435901e-05, "loss": 0.3217, "step": 2039, "task_loss": 0.1889341026544571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7564560692865185, "compression_loss": 0.0, "distillation_loss": 0.22618404030799866, "epoch": 1.94, "learning_rate": 4.115523415272358e-05, "loss": 0.2265, "step": 2040, "task_loss": 0.2290044128894806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7564765252511294, "compression_loss": 0.0, "distillation_loss": 0.08555667102336884, "epoch": 1.94, "learning_rate": 4.1147101026412046e-05, "loss": 0.0885, "step": 2041, "task_loss": 0.11483641713857651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7564969748082336, "compression_loss": 0.0, "distillation_loss": 0.17309504747390747, "epoch": 1.94, "learning_rate": 4.1138964966901853e-05, "loss": 0.1651, "step": 2042, "task_loss": 0.09353820979595184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7565174179588352, "compression_loss": 0.0, "distillation_loss": 0.10289028286933899, "epoch": 1.94, "learning_rate": 4.113082597567095e-05, "loss": 0.1063, "step": 2043, "task_loss": 0.1365654170513153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7565378547039375, "compression_loss": 0.0, "distillation_loss": 0.03402677923440933, "epoch": 1.94, "learning_rate": 4.112268405419782e-05, "loss": 0.0313, "step": 2044, "task_loss": 0.006699586287140846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7565582850445445, "compression_loss": 0.0, "distillation_loss": 0.26117533445358276, "epoch": 1.94, "learning_rate": 4.1114539203961476e-05, "loss": 0.2482, "step": 2045, "task_loss": 0.13159014284610748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7565787089816597, "compression_loss": 0.0, "distillation_loss": 0.1347775161266327, "epoch": 1.94, "learning_rate": 4.110639142644149e-05, "loss": 0.1373, "step": 2046, "task_loss": 0.16003359854221344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7565991265162868, "compression_loss": 0.0, "distillation_loss": 0.2649126648902893, "epoch": 1.94, "learning_rate": 4.109824072311792e-05, "loss": 0.2607, "step": 2047, "task_loss": 0.2226344645023346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7566195376494295, "compression_loss": 0.0, "distillation_loss": 0.21198835968971252, "epoch": 1.94, "learning_rate": 4.10900870954714e-05, "loss": 0.2042, "step": 2048, "task_loss": 0.13424277305603027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7566399423820915, "compression_loss": 0.0, "distillation_loss": 0.16852621734142303, "epoch": 1.95, "learning_rate": 4.108193054498307e-05, "loss": 0.164, "step": 2049, "task_loss": 0.1233605444431305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7566603407152765, "compression_loss": 0.0, "distillation_loss": 0.08903372287750244, "epoch": 1.95, "learning_rate": 4.10737710731346e-05, "loss": 0.0815, "step": 2050, "task_loss": 0.013720404356718063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7566807326499883, "compression_loss": 0.0, "distillation_loss": 0.1362341046333313, "epoch": 1.95, "learning_rate": 4.106560868140821e-05, "loss": 0.1378, "step": 2051, "task_loss": 0.15231622755527496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7567011181872302, "compression_loss": 0.0, "distillation_loss": 0.12862615287303925, "epoch": 1.95, "learning_rate": 4.105744337128662e-05, "loss": 0.1334, "step": 2052, "task_loss": 0.17673933506011963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7567214973280062, "compression_loss": 0.0, "distillation_loss": 0.12414275854825974, "epoch": 1.95, "learning_rate": 4.104927514425312e-05, "loss": 0.1193, "step": 2053, "task_loss": 0.07598595321178436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7567418700733199, "compression_loss": 0.0, "distillation_loss": 0.08933214098215103, "epoch": 1.95, "learning_rate": 4.104110400179148e-05, "loss": 0.0928, "step": 2054, "task_loss": 0.12388118356466293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7567622364241751, "compression_loss": 0.0, "distillation_loss": 0.14208053052425385, "epoch": 1.95, "learning_rate": 4.103292994538605e-05, "loss": 0.1349, "step": 2055, "task_loss": 0.07043315470218658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7567825963815752, "compression_loss": 0.0, "distillation_loss": 0.26003506779670715, "epoch": 1.95, "learning_rate": 4.102475297652168e-05, "loss": 0.2527, "step": 2056, "task_loss": 0.18638324737548828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7568029499465241, "compression_loss": 0.0, "distillation_loss": 0.13777679204940796, "epoch": 1.95, "learning_rate": 4.1016573096683765e-05, "loss": 0.1306, "step": 2057, "task_loss": 0.06621996313333511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7568232971200255, "compression_loss": 0.0, "distillation_loss": 0.05839243531227112, "epoch": 1.95, "learning_rate": 4.10083903073582e-05, "loss": 0.053, "step": 2058, "task_loss": 0.004214171320199966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.756843637903083, "compression_loss": 0.0, "distillation_loss": 0.3341011106967926, "epoch": 1.96, "learning_rate": 4.1000204610031447e-05, "loss": 0.3197, "step": 2059, "task_loss": 0.18979643285274506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7568639722967002, "compression_loss": 0.0, "distillation_loss": 0.06705968081951141, "epoch": 1.96, "learning_rate": 4.0992016006190456e-05, "loss": 0.0707, "step": 2060, "task_loss": 0.10322168469429016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7568843003018809, "compression_loss": 0.0, "distillation_loss": 0.047776952385902405, "epoch": 1.96, "learning_rate": 4.0983824497322755e-05, "loss": 0.0474, "step": 2061, "task_loss": 0.04416225105524063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7569046219196288, "compression_loss": 0.0, "distillation_loss": 0.10682905465364456, "epoch": 1.96, "learning_rate": 4.0975630084916344e-05, "loss": 0.1004, "step": 2062, "task_loss": 0.042584970593452454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7569249371509476, "compression_loss": 0.0, "distillation_loss": 0.14277291297912598, "epoch": 1.96, "learning_rate": 4.096743277045979e-05, "loss": 0.1367, "step": 2063, "task_loss": 0.08212710171937943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7569452459968409, "compression_loss": 0.0, "distillation_loss": 0.20160989463329315, "epoch": 1.96, "learning_rate": 4.0959232555442174e-05, "loss": 0.194, "step": 2064, "task_loss": 0.12543994188308716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7569655484583123, "compression_loss": 0.0, "distillation_loss": 0.12779472768306732, "epoch": 1.96, "learning_rate": 4.0951029441353104e-05, "loss": 0.1189, "step": 2065, "task_loss": 0.03901362046599388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7569858445363656, "compression_loss": 0.0, "distillation_loss": 0.19314239919185638, "epoch": 1.96, "learning_rate": 4.094282342968271e-05, "loss": 0.1827, "step": 2066, "task_loss": 0.08886859565973282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7570061342320046, "compression_loss": 0.0, "distillation_loss": 0.13019533455371857, "epoch": 1.96, "learning_rate": 4.093461452192167e-05, "loss": 0.1232, "step": 2067, "task_loss": 0.05998740345239639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7570264175462328, "compression_loss": 0.0, "distillation_loss": 0.21118499338626862, "epoch": 1.96, "learning_rate": 4.092640271956115e-05, "loss": 0.2132, "step": 2068, "task_loss": 0.23096507787704468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7570466944800539, "compression_loss": 0.0, "distillation_loss": 0.09960189461708069, "epoch": 1.96, "learning_rate": 4.091818802409288e-05, "loss": 0.0991, "step": 2069, "task_loss": 0.094699427485466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7570669650344717, "compression_loss": 0.0, "distillation_loss": 0.09009353071451187, "epoch": 1.97, "learning_rate": 4.0909970437009096e-05, "loss": 0.0879, "step": 2070, "task_loss": 0.06863107532262802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7570872292104897, "compression_loss": 0.0, "distillation_loss": 0.05240606516599655, "epoch": 1.97, "learning_rate": 4.0901749959802546e-05, "loss": 0.0545, "step": 2071, "task_loss": 0.07330939918756485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7571074870091117, "compression_loss": 0.0, "distillation_loss": 0.32454681396484375, "epoch": 1.97, "learning_rate": 4.0893526593966535e-05, "loss": 0.3176, "step": 2072, "task_loss": 0.25487715005874634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7571277384313414, "compression_loss": 0.0, "distillation_loss": 0.1994623839855194, "epoch": 1.97, "learning_rate": 4.088530034099488e-05, "loss": 0.1925, "step": 2073, "task_loss": 0.13009962439537048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7571479834781824, "compression_loss": 0.0, "distillation_loss": 0.043959006667137146, "epoch": 1.97, "learning_rate": 4.087707120238191e-05, "loss": 0.0632, "step": 2074, "task_loss": 0.2364317774772644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7571682221506384, "compression_loss": 0.0, "distillation_loss": 0.1748720109462738, "epoch": 1.97, "learning_rate": 4.0868839179622495e-05, "loss": 0.1739, "step": 2075, "task_loss": 0.16508643329143524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7571884544497132, "compression_loss": 0.0, "distillation_loss": 0.09879680722951889, "epoch": 1.97, "learning_rate": 4.086060427421202e-05, "loss": 0.1112, "step": 2076, "task_loss": 0.2223973572254181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7572086803764103, "compression_loss": 0.0, "distillation_loss": 0.13838914036750793, "epoch": 1.97, "learning_rate": 4.0852366487646384e-05, "loss": 0.1375, "step": 2077, "task_loss": 0.1297679990530014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7572288999317336, "compression_loss": 0.0, "distillation_loss": 0.17635372281074524, "epoch": 1.97, "learning_rate": 4.084412582142204e-05, "loss": 0.1725, "step": 2078, "task_loss": 0.1377314180135727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7572491131166865, "compression_loss": 0.0, "distillation_loss": 0.09490906447172165, "epoch": 1.97, "learning_rate": 4.083588227703593e-05, "loss": 0.1, "step": 2079, "task_loss": 0.1462094932794571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.757269319932273, "compression_loss": 0.0, "distillation_loss": 0.05555427446961403, "epoch": 1.98, "learning_rate": 4.0827635855985534e-05, "loss": 0.0618, "step": 2080, "task_loss": 0.11752540618181229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7572895203794965, "compression_loss": 0.0, "distillation_loss": 0.08329576253890991, "epoch": 1.98, "learning_rate": 4.081938655976886e-05, "loss": 0.0857, "step": 2081, "task_loss": 0.10723456740379333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7573097144593608, "compression_loss": 0.0, "distillation_loss": 0.1329977810382843, "epoch": 1.98, "learning_rate": 4.0811134389884433e-05, "loss": 0.1404, "step": 2082, "task_loss": 0.2066575586795807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7573299021728697, "compression_loss": 0.0, "distillation_loss": 0.03028569370508194, "epoch": 1.98, "learning_rate": 4.08028793478313e-05, "loss": 0.0541, "step": 2083, "task_loss": 0.2682817280292511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7573500835210266, "compression_loss": 0.0, "distillation_loss": 0.2019033133983612, "epoch": 1.98, "learning_rate": 4.0794621435109015e-05, "loss": 0.2013, "step": 2084, "task_loss": 0.19626502692699432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7573702585048355, "compression_loss": 0.0, "distillation_loss": 0.04804403334856033, "epoch": 1.98, "learning_rate": 4.0786360653217684e-05, "loss": 0.049, "step": 2085, "task_loss": 0.05735541135072708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7573904271252998, "compression_loss": 0.0, "distillation_loss": 0.14371657371520996, "epoch": 1.98, "learning_rate": 4.0778097003657915e-05, "loss": 0.1367, "step": 2086, "task_loss": 0.07343168556690216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7574105893834234, "compression_loss": 0.0, "distillation_loss": 0.172747403383255, "epoch": 1.98, "learning_rate": 4.0769830487930835e-05, "loss": 0.1739, "step": 2087, "task_loss": 0.18447381258010864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7574307452802099, "compression_loss": 0.0, "distillation_loss": 0.18645472824573517, "epoch": 1.98, "learning_rate": 4.07615611075381e-05, "loss": 0.1843, "step": 2088, "task_loss": 0.16447779536247253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.757450894816663, "compression_loss": 0.0, "distillation_loss": 0.09333232790231705, "epoch": 1.98, "learning_rate": 4.075328886398188e-05, "loss": 0.102, "step": 2089, "task_loss": 0.17981281876564026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7574710379937862, "compression_loss": 0.0, "distillation_loss": 0.1386614441871643, "epoch": 1.98, "learning_rate": 4.074501375876487e-05, "loss": 0.1315, "step": 2090, "task_loss": 0.0667913407087326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7574911748125835, "compression_loss": 0.0, "distillation_loss": 0.1604781299829483, "epoch": 1.99, "learning_rate": 4.073673579339028e-05, "loss": 0.155, "step": 2091, "task_loss": 0.10606161504983902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7575113052740584, "compression_loss": 0.0, "distillation_loss": 0.2468898594379425, "epoch": 1.99, "learning_rate": 4.0728454969361854e-05, "loss": 0.25, "step": 2092, "task_loss": 0.2778300940990448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7575314293792146, "compression_loss": 0.0, "distillation_loss": 0.20150130987167358, "epoch": 1.99, "learning_rate": 4.0720171288183815e-05, "loss": 0.1922, "step": 2093, "task_loss": 0.10800500959157944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7575515471290557, "compression_loss": 0.0, "distillation_loss": 0.1500411480665207, "epoch": 1.99, "learning_rate": 4.0711884751360964e-05, "loss": 0.1402, "step": 2094, "task_loss": 0.051413945853710175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7575716585245855, "compression_loss": 0.0, "distillation_loss": 0.052510011941194534, "epoch": 1.99, "learning_rate": 4.070359536039858e-05, "loss": 0.066, "step": 2095, "task_loss": 0.18779529631137848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7575917635668078, "compression_loss": 0.0, "distillation_loss": 0.2838015854358673, "epoch": 1.99, "learning_rate": 4.069530311680247e-05, "loss": 0.2762, "step": 2096, "task_loss": 0.20768359303474426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7576118622567259, "compression_loss": 0.0, "distillation_loss": 0.24412603676319122, "epoch": 1.99, "learning_rate": 4.068700802207895e-05, "loss": 0.2467, "step": 2097, "task_loss": 0.2701077461242676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7576319545953439, "compression_loss": 0.0, "distillation_loss": 0.18422961235046387, "epoch": 1.99, "learning_rate": 4.0678710077734885e-05, "loss": 0.1914, "step": 2098, "task_loss": 0.2559583783149719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7576520405836652, "compression_loss": 0.0, "distillation_loss": 0.10283166915178299, "epoch": 1.99, "learning_rate": 4.0670409285277614e-05, "loss": 0.1053, "step": 2099, "task_loss": 0.12738347053527832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7576721202226936, "compression_loss": 0.0, "distillation_loss": 0.24559719860553741, "epoch": 1.99, "learning_rate": 4.0662105646215034e-05, "loss": 0.2482, "step": 2100, "task_loss": 0.271691232919693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7576921935134328, "compression_loss": 0.0, "distillation_loss": 0.15227892994880676, "epoch": 2.0, "learning_rate": 4.065379916205554e-05, "loss": 0.1468, "step": 2101, "task_loss": 0.09752043336629868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7577122604568864, "compression_loss": 0.0, "distillation_loss": 0.0908876582980156, "epoch": 2.0, "learning_rate": 4.0645489834308024e-05, "loss": 0.0848, "step": 2102, "task_loss": 0.02978145144879818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7577323210540582, "compression_loss": 0.0, "distillation_loss": 0.3263307809829712, "epoch": 2.0, "learning_rate": 4.063717766448194e-05, "loss": 0.3075, "step": 2103, "task_loss": 0.1381606012582779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7577523753059517, "compression_loss": 0.0, "distillation_loss": 0.15767845511436462, "epoch": 2.0, "learning_rate": 4.062886265408722e-05, "loss": 0.1544, "step": 2104, "task_loss": 0.12524300813674927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.7577724232135707, "compression_loss": 0.0, "distillation_loss": 0.15870457887649536, "epoch": 2.0, "learning_rate": 4.062054480463433e-05, "loss": 0.1576, "step": 2105, "task_loss": 0.14793536067008972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5726075510474254, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7330078136692348, "compression/magnitude_sparsity/target_sparsity_level": 0.757792464777919, "compression_loss": 0.0, "distillation_loss": 0.016080807894468307, "epoch": 2.0, "learning_rate": 4.0612224117634245e-05, "loss": 0.0148, "step": 2106, "task_loss": 0.003325019497424364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7578125, "compression_loss": 0.0, "distillation_loss": 0.8665308952331543, "epoch": 2.0, "learning_rate": 4.060390059459846e-05, "loss": 0.8038, "step": 2107, "task_loss": 0.23952460289001465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7578325288808176, "compression_loss": 0.0, "distillation_loss": 0.775876522064209, "epoch": 2.0, "learning_rate": 4.059557423703899e-05, "loss": 0.7176, "step": 2108, "task_loss": 0.1935485601425171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7578525514213754, "compression_loss": 0.0, "distillation_loss": 0.8252198696136475, "epoch": 2.0, "learning_rate": 4.058724504646834e-05, "loss": 0.7657, "step": 2109, "task_loss": 0.230499267578125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7578725676226772, "compression_loss": 0.0, "distillation_loss": 0.6135385036468506, "epoch": 2.0, "learning_rate": 4.0578913024399564e-05, "loss": 0.5737, "step": 2110, "task_loss": 0.21483993530273438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7578925774857265, "compression_loss": 0.0, "distillation_loss": 0.5455840826034546, "epoch": 2.0, "learning_rate": 4.057057817234621e-05, "loss": 0.5087, "step": 2111, "task_loss": 0.17693614959716797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.757912581011527, "compression_loss": 0.0, "distillation_loss": 0.43998754024505615, "epoch": 2.01, "learning_rate": 4.0562240491822334e-05, "loss": 0.4114, "step": 2112, "task_loss": 0.154291033744812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7579325782010825, "compression_loss": 0.0, "distillation_loss": 0.3841041922569275, "epoch": 2.01, "learning_rate": 4.055389998434253e-05, "loss": 0.3626, "step": 2113, "task_loss": 0.16882005333900452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7579525690553967, "compression_loss": 0.0, "distillation_loss": 0.4492219090461731, "epoch": 2.01, "learning_rate": 4.054555665142189e-05, "loss": 0.4199, "step": 2114, "task_loss": 0.15565484762191772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7579725535754731, "compression_loss": 0.0, "distillation_loss": 0.3286392390727997, "epoch": 2.01, "learning_rate": 4.053721049457601e-05, "loss": 0.3141, "step": 2115, "task_loss": 0.1833527535200119 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7579925317623155, "compression_loss": 0.0, "distillation_loss": 0.2822880148887634, "epoch": 2.01, "learning_rate": 4.052886151532101e-05, "loss": 0.2741, "step": 2116, "task_loss": 0.2002839893102646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7580125036169276, "compression_loss": 0.0, "distillation_loss": 0.30642810463905334, "epoch": 2.01, "learning_rate": 4.0520509715173544e-05, "loss": 0.2994, "step": 2117, "task_loss": 0.235684335231781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7580324691403132, "compression_loss": 0.0, "distillation_loss": 0.20797675848007202, "epoch": 2.01, "learning_rate": 4.051215509565073e-05, "loss": 0.2032, "step": 2118, "task_loss": 0.16037693619728088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7580524283334756, "compression_loss": 0.0, "distillation_loss": 0.12344904243946075, "epoch": 2.01, "learning_rate": 4.050379765827024e-05, "loss": 0.1167, "step": 2119, "task_loss": 0.056173477321863174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7580723811974189, "compression_loss": 0.0, "distillation_loss": 0.1896042823791504, "epoch": 2.01, "learning_rate": 4.0495437404550233e-05, "loss": 0.179, "step": 2120, "task_loss": 0.08344145119190216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7580923277331465, "compression_loss": 0.0, "distillation_loss": 0.16051128506660461, "epoch": 2.01, "learning_rate": 4.04870743360094e-05, "loss": 0.1519, "step": 2121, "task_loss": 0.07446075230836868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7581122679416623, "compression_loss": 0.0, "distillation_loss": 0.37767481803894043, "epoch": 2.02, "learning_rate": 4.047870845416693e-05, "loss": 0.3772, "step": 2122, "task_loss": 0.3733970522880554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7581322018239698, "compression_loss": 0.0, "distillation_loss": 0.21346813440322876, "epoch": 2.02, "learning_rate": 4.0470339760542506e-05, "loss": 0.2017, "step": 2123, "task_loss": 0.09529842436313629 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7581521293810727, "compression_loss": 0.0, "distillation_loss": 0.6465510129928589, "epoch": 2.02, "learning_rate": 4.0461968256656376e-05, "loss": 0.6242, "step": 2124, "task_loss": 0.42273980379104614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7581720506139747, "compression_loss": 0.0, "distillation_loss": 0.4466746747493744, "epoch": 2.02, "learning_rate": 4.045359394402925e-05, "loss": 0.4386, "step": 2125, "task_loss": 0.366089791059494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7581919655236797, "compression_loss": 0.0, "distillation_loss": 0.05343036353588104, "epoch": 2.02, "learning_rate": 4.0445216824182344e-05, "loss": 0.0488, "step": 2126, "task_loss": 0.006774421781301498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7582118741111911, "compression_loss": 0.0, "distillation_loss": 0.042159806936979294, "epoch": 2.02, "learning_rate": 4.043683689863742e-05, "loss": 0.0449, "step": 2127, "task_loss": 0.06956613063812256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7582317763775126, "compression_loss": 0.0, "distillation_loss": 0.3928431272506714, "epoch": 2.02, "learning_rate": 4.042845416891673e-05, "loss": 0.3936, "step": 2128, "task_loss": 0.3999135494232178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7582516723236481, "compression_loss": 0.0, "distillation_loss": 0.22617900371551514, "epoch": 2.02, "learning_rate": 4.042006863654303e-05, "loss": 0.245, "step": 2129, "task_loss": 0.4148496389389038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758271561950601, "compression_loss": 0.0, "distillation_loss": 0.21618734300136566, "epoch": 2.02, "learning_rate": 4.041168030303961e-05, "loss": 0.207, "step": 2130, "task_loss": 0.12459345906972885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7582914452593752, "compression_loss": 0.0, "distillation_loss": 0.398822546005249, "epoch": 2.02, "learning_rate": 4.0403289169930235e-05, "loss": 0.3847, "step": 2131, "task_loss": 0.2576183080673218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7583113222509743, "compression_loss": 0.0, "distillation_loss": 0.16397595405578613, "epoch": 2.02, "learning_rate": 4.03948952387392e-05, "loss": 0.1636, "step": 2132, "task_loss": 0.15986183285713196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758331192926402, "compression_loss": 0.0, "distillation_loss": 0.19217489659786224, "epoch": 2.03, "learning_rate": 4.03864985109913e-05, "loss": 0.1823, "step": 2133, "task_loss": 0.09319883584976196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758351057286662, "compression_loss": 0.0, "distillation_loss": 0.16633236408233643, "epoch": 2.03, "learning_rate": 4.0378098988211845e-05, "loss": 0.1563, "step": 2134, "task_loss": 0.06592224538326263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7583709153327578, "compression_loss": 0.0, "distillation_loss": 0.1656203418970108, "epoch": 2.03, "learning_rate": 4.036969667192665e-05, "loss": 0.1591, "step": 2135, "task_loss": 0.10005275160074234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7583907670656934, "compression_loss": 0.0, "distillation_loss": 0.1509263813495636, "epoch": 2.03, "learning_rate": 4.036129156366203e-05, "loss": 0.1424, "step": 2136, "task_loss": 0.0660940557718277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7584106124864722, "compression_loss": 0.0, "distillation_loss": 0.1414584517478943, "epoch": 2.03, "learning_rate": 4.0352883664944816e-05, "loss": 0.137, "step": 2137, "task_loss": 0.09674646705389023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7584304515960981, "compression_loss": 0.0, "distillation_loss": 0.10992178320884705, "epoch": 2.03, "learning_rate": 4.034447297730234e-05, "loss": 0.1093, "step": 2138, "task_loss": 0.10394454747438431 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7584502843955747, "compression_loss": 0.0, "distillation_loss": 0.2345104068517685, "epoch": 2.03, "learning_rate": 4.033605950226246e-05, "loss": 0.2226, "step": 2139, "task_loss": 0.115862175822258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7584701108859055, "compression_loss": 0.0, "distillation_loss": 0.15543803572654724, "epoch": 2.03, "learning_rate": 4.03276432413535e-05, "loss": 0.1514, "step": 2140, "task_loss": 0.11481384187936783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7584899310680945, "compression_loss": 0.0, "distillation_loss": 0.12512551248073578, "epoch": 2.03, "learning_rate": 4.0319224196104334e-05, "loss": 0.1191, "step": 2141, "task_loss": 0.06533493101596832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7585097449431452, "compression_loss": 0.0, "distillation_loss": 0.09096786379814148, "epoch": 2.03, "learning_rate": 4.031080236804431e-05, "loss": 0.0902, "step": 2142, "task_loss": 0.08328302204608917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7585295525120613, "compression_loss": 0.0, "distillation_loss": 0.10509166121482849, "epoch": 2.04, "learning_rate": 4.03023777587033e-05, "loss": 0.0993, "step": 2143, "task_loss": 0.04766244813799858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7585493537758464, "compression_loss": 0.0, "distillation_loss": 0.1337359994649887, "epoch": 2.04, "learning_rate": 4.029395036961168e-05, "loss": 0.1341, "step": 2144, "task_loss": 0.13710615038871765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7585691487355044, "compression_loss": 0.0, "distillation_loss": 0.10559399425983429, "epoch": 2.04, "learning_rate": 4.028552020230031e-05, "loss": 0.0992, "step": 2145, "task_loss": 0.042120400816202164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7585889373920388, "compression_loss": 0.0, "distillation_loss": 0.04972688853740692, "epoch": 2.04, "learning_rate": 4.0277087258300575e-05, "loss": 0.0459, "step": 2146, "task_loss": 0.01133745163679123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7586087197464534, "compression_loss": 0.0, "distillation_loss": 0.16767317056655884, "epoch": 2.04, "learning_rate": 4.0268651539144374e-05, "loss": 0.1856, "step": 2147, "task_loss": 0.3467387855052948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7586284957997518, "compression_loss": 0.0, "distillation_loss": 0.17021751403808594, "epoch": 2.04, "learning_rate": 4.026021304636408e-05, "loss": 0.1609, "step": 2148, "task_loss": 0.07744796574115753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7586482655529376, "compression_loss": 0.0, "distillation_loss": 0.2166343480348587, "epoch": 2.04, "learning_rate": 4.0251771781492594e-05, "loss": 0.2155, "step": 2149, "task_loss": 0.20558899641036987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7586680290070147, "compression_loss": 0.0, "distillation_loss": 0.23501603305339813, "epoch": 2.04, "learning_rate": 4.0243327746063315e-05, "loss": 0.2259, "step": 2150, "task_loss": 0.14403802156448364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7586877861629866, "compression_loss": 0.0, "distillation_loss": 0.21590901911258698, "epoch": 2.04, "learning_rate": 4.0234880941610134e-05, "loss": 0.2069, "step": 2151, "task_loss": 0.1256270408630371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758707537021857, "compression_loss": 0.0, "distillation_loss": 0.22788038849830627, "epoch": 2.04, "learning_rate": 4.022643136966746e-05, "loss": 0.2208, "step": 2152, "task_loss": 0.1574449986219406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7587272815846298, "compression_loss": 0.0, "distillation_loss": 0.34770405292510986, "epoch": 2.04, "learning_rate": 4.021797903177019e-05, "loss": 0.336, "step": 2153, "task_loss": 0.23103071749210358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7587470198523083, "compression_loss": 0.0, "distillation_loss": 0.13833436369895935, "epoch": 2.05, "learning_rate": 4.0209523929453744e-05, "loss": 0.1318, "step": 2154, "task_loss": 0.0730195865035057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7587667518258966, "compression_loss": 0.0, "distillation_loss": 0.1359223574399948, "epoch": 2.05, "learning_rate": 4.0201066064254026e-05, "loss": 0.1326, "step": 2155, "task_loss": 0.10307220369577408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758786477506398, "compression_loss": 0.0, "distillation_loss": 0.19817477464675903, "epoch": 2.05, "learning_rate": 4.019260543770745e-05, "loss": 0.1921, "step": 2156, "task_loss": 0.13708311319351196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7588061968948164, "compression_loss": 0.0, "distillation_loss": 0.1101415753364563, "epoch": 2.05, "learning_rate": 4.018414205135093e-05, "loss": 0.1024, "step": 2157, "task_loss": 0.03281085193157196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7588259099921556, "compression_loss": 0.0, "distillation_loss": 0.2274709939956665, "epoch": 2.05, "learning_rate": 4.017567590672187e-05, "loss": 0.2163, "step": 2158, "task_loss": 0.11540090292692184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.758845616799419, "compression_loss": 0.0, "distillation_loss": 0.26632124185562134, "epoch": 2.05, "learning_rate": 4.01672070053582e-05, "loss": 0.2622, "step": 2159, "task_loss": 0.2254357933998108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7588653173176103, "compression_loss": 0.0, "distillation_loss": 0.32481926679611206, "epoch": 2.05, "learning_rate": 4.015873534879833e-05, "loss": 0.3212, "step": 2160, "task_loss": 0.288277268409729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7588850115477335, "compression_loss": 0.0, "distillation_loss": 0.2757709324359894, "epoch": 2.05, "learning_rate": 4.015026093858119e-05, "loss": 0.2797, "step": 2161, "task_loss": 0.3151787519454956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7589046994907919, "compression_loss": 0.0, "distillation_loss": 0.07919088006019592, "epoch": 2.05, "learning_rate": 4.014178377624617e-05, "loss": 0.077, "step": 2162, "task_loss": 0.05737023800611496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7589243811477894, "compression_loss": 0.0, "distillation_loss": 0.13900256156921387, "epoch": 2.05, "learning_rate": 4.013330386333321e-05, "loss": 0.138, "step": 2163, "task_loss": 0.12849166989326477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7589440565197296, "compression_loss": 0.0, "distillation_loss": 0.14013248682022095, "epoch": 2.06, "learning_rate": 4.012482120138272e-05, "loss": 0.1338, "step": 2164, "task_loss": 0.07710616290569305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7589637256076164, "compression_loss": 0.0, "distillation_loss": 0.05423755198717117, "epoch": 2.06, "learning_rate": 4.011633579193561e-05, "loss": 0.0512, "step": 2165, "task_loss": 0.024237103760242462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7589833884124532, "compression_loss": 0.0, "distillation_loss": 0.20200000703334808, "epoch": 2.06, "learning_rate": 4.010784763653331e-05, "loss": 0.2011, "step": 2166, "task_loss": 0.19308245182037354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7590030449352436, "compression_loss": 0.0, "distillation_loss": 0.20488443970680237, "epoch": 2.06, "learning_rate": 4.0099356736717725e-05, "loss": 0.2045, "step": 2167, "task_loss": 0.20138651132583618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7590226951769916, "compression_loss": 0.0, "distillation_loss": 0.10349798947572708, "epoch": 2.06, "learning_rate": 4.0090863094031274e-05, "loss": 0.1, "step": 2168, "task_loss": 0.06868550926446915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7590423391387008, "compression_loss": 0.0, "distillation_loss": 0.09170787781476974, "epoch": 2.06, "learning_rate": 4.008236671001686e-05, "loss": 0.0931, "step": 2169, "task_loss": 0.10580511391162872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7590619768213748, "compression_loss": 0.0, "distillation_loss": 0.15538711845874786, "epoch": 2.06, "learning_rate": 4.0073867586217895e-05, "loss": 0.1487, "step": 2170, "task_loss": 0.08846833556890488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7590816082260172, "compression_loss": 0.0, "distillation_loss": 0.1697424054145813, "epoch": 2.06, "learning_rate": 4.006536572417828e-05, "loss": 0.1626, "step": 2171, "task_loss": 0.09881134331226349 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7591012333536319, "compression_loss": 0.0, "distillation_loss": 0.13482381403446198, "epoch": 2.06, "learning_rate": 4.0056861125442435e-05, "loss": 0.1331, "step": 2172, "task_loss": 0.11754244565963745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7591208522052225, "compression_loss": 0.0, "distillation_loss": 0.29298990964889526, "epoch": 2.06, "learning_rate": 4.004835379155525e-05, "loss": 0.2928, "step": 2173, "task_loss": 0.291414350271225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7591404647817925, "compression_loss": 0.0, "distillation_loss": 0.09788675606250763, "epoch": 2.06, "learning_rate": 4.003984372406212e-05, "loss": 0.0913, "step": 2174, "task_loss": 0.03239801526069641 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7591600710843459, "compression_loss": 0.0, "distillation_loss": 0.05986270681023598, "epoch": 2.07, "learning_rate": 4.003133092450895e-05, "loss": 0.0661, "step": 2175, "task_loss": 0.1226883977651596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7591796711138862, "compression_loss": 0.0, "distillation_loss": 0.038474880158901215, "epoch": 2.07, "learning_rate": 4.002281539444213e-05, "loss": 0.0356, "step": 2176, "task_loss": 0.00936584360897541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.759199264871417, "compression_loss": 0.0, "distillation_loss": 0.11121018975973129, "epoch": 2.07, "learning_rate": 4.001429713540853e-05, "loss": 0.1127, "step": 2177, "task_loss": 0.12647242844104767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7592188523579422, "compression_loss": 0.0, "distillation_loss": 0.11997029185295105, "epoch": 2.07, "learning_rate": 4.000577614895555e-05, "loss": 0.1197, "step": 2178, "task_loss": 0.11679819226264954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7592384335744653, "compression_loss": 0.0, "distillation_loss": 0.2858255207538605, "epoch": 2.07, "learning_rate": 3.999725243663107e-05, "loss": 0.2816, "step": 2179, "task_loss": 0.24326691031455994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.75925800852199, "compression_loss": 0.0, "distillation_loss": 0.09184737503528595, "epoch": 2.07, "learning_rate": 3.9988725999983456e-05, "loss": 0.0963, "step": 2180, "task_loss": 0.13659027218818665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7592775772015201, "compression_loss": 0.0, "distillation_loss": 0.11890025436878204, "epoch": 2.07, "learning_rate": 3.998019684056158e-05, "loss": 0.121, "step": 2181, "task_loss": 0.13989683985710144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7592971396140592, "compression_loss": 0.0, "distillation_loss": 0.189500093460083, "epoch": 2.07, "learning_rate": 3.99716649599148e-05, "loss": 0.1928, "step": 2182, "task_loss": 0.22282639145851135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.759316695760611, "compression_loss": 0.0, "distillation_loss": 0.23618614673614502, "epoch": 2.07, "learning_rate": 3.996313035959297e-05, "loss": 0.2329, "step": 2183, "task_loss": 0.20297005772590637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7593362456421792, "compression_loss": 0.0, "distillation_loss": 0.09011323750019073, "epoch": 2.07, "learning_rate": 3.995459304114645e-05, "loss": 0.0857, "step": 2184, "task_loss": 0.04603835195302963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7593557892597673, "compression_loss": 0.0, "distillation_loss": 0.14607930183410645, "epoch": 2.08, "learning_rate": 3.9946053006126086e-05, "loss": 0.1419, "step": 2185, "task_loss": 0.1042981743812561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7593753266143792, "compression_loss": 0.0, "distillation_loss": 0.12250328063964844, "epoch": 2.08, "learning_rate": 3.993751025608321e-05, "loss": 0.1182, "step": 2186, "task_loss": 0.07953470945358276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7593948577070186, "compression_loss": 0.0, "distillation_loss": 0.1935318261384964, "epoch": 2.08, "learning_rate": 3.9928964792569655e-05, "loss": 0.1838, "step": 2187, "task_loss": 0.09669722616672516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7594143825386891, "compression_loss": 0.0, "distillation_loss": 0.07566056400537491, "epoch": 2.08, "learning_rate": 3.9920416617137745e-05, "loss": 0.0751, "step": 2188, "task_loss": 0.07045575231313705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7594339011103943, "compression_loss": 0.0, "distillation_loss": 0.05975474417209625, "epoch": 2.08, "learning_rate": 3.9911865731340306e-05, "loss": 0.0558, "step": 2189, "task_loss": 0.020496994256973267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7594534134231381, "compression_loss": 0.0, "distillation_loss": 0.1242765486240387, "epoch": 2.08, "learning_rate": 3.9903312136730634e-05, "loss": 0.1215, "step": 2190, "task_loss": 0.09669384360313416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7594729194779241, "compression_loss": 0.0, "distillation_loss": 0.15067118406295776, "epoch": 2.08, "learning_rate": 3.989475583486254e-05, "loss": 0.1422, "step": 2191, "task_loss": 0.06594227254390717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7594924192757557, "compression_loss": 0.0, "distillation_loss": 0.15141010284423828, "epoch": 2.08, "learning_rate": 3.988619682729032e-05, "loss": 0.1463, "step": 2192, "task_loss": 0.10027869790792465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.759511912817637, "compression_loss": 0.0, "distillation_loss": 0.08504438400268555, "epoch": 2.08, "learning_rate": 3.987763511556874e-05, "loss": 0.0815, "step": 2193, "task_loss": 0.0492318794131279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7595314001045714, "compression_loss": 0.0, "distillation_loss": 0.03857031837105751, "epoch": 2.08, "learning_rate": 3.98690707012531e-05, "loss": 0.0354, "step": 2194, "task_loss": 0.006879139691591263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7595508811375626, "compression_loss": 0.0, "distillation_loss": 0.12435384094715118, "epoch": 2.08, "learning_rate": 3.986050358589916e-05, "loss": 0.1221, "step": 2195, "task_loss": 0.10201095044612885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7595703559176146, "compression_loss": 0.0, "distillation_loss": 0.13030223548412323, "epoch": 2.09, "learning_rate": 3.9851933771063166e-05, "loss": 0.1234, "step": 2196, "task_loss": 0.06090515851974487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7595898244457306, "compression_loss": 0.0, "distillation_loss": 0.21584641933441162, "epoch": 2.09, "learning_rate": 3.9843361258301876e-05, "loss": 0.204, "step": 2197, "task_loss": 0.09771460294723511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7596092867229147, "compression_loss": 0.0, "distillation_loss": 0.09045465290546417, "epoch": 2.09, "learning_rate": 3.983478604917253e-05, "loss": 0.0834, "step": 2198, "task_loss": 0.01959792897105217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7596287427501703, "compression_loss": 0.0, "distillation_loss": 0.17018280923366547, "epoch": 2.09, "learning_rate": 3.9826208145232855e-05, "loss": 0.1639, "step": 2199, "task_loss": 0.10730506479740143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7596481925285012, "compression_loss": 0.0, "distillation_loss": 0.0529235415160656, "epoch": 2.09, "learning_rate": 3.981762754804107e-05, "loss": 0.0491, "step": 2200, "task_loss": 0.015178944915533066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7596676360589112, "compression_loss": 0.0, "distillation_loss": 0.16582679748535156, "epoch": 2.09, "learning_rate": 3.980904425915586e-05, "loss": 0.157, "step": 2201, "task_loss": 0.07799357175827026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7596870733424037, "compression_loss": 0.0, "distillation_loss": 0.12228982895612717, "epoch": 2.09, "learning_rate": 3.9800458280136453e-05, "loss": 0.1221, "step": 2202, "task_loss": 0.12067455798387527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7597065043799826, "compression_loss": 0.0, "distillation_loss": 0.28389573097229004, "epoch": 2.09, "learning_rate": 3.979186961254252e-05, "loss": 0.2762, "step": 2203, "task_loss": 0.2065102607011795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7597259291726515, "compression_loss": 0.0, "distillation_loss": 0.04436230659484863, "epoch": 2.09, "learning_rate": 3.9783278257934233e-05, "loss": 0.0407, "step": 2204, "task_loss": 0.007897831499576569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7597453477214141, "compression_loss": 0.0, "distillation_loss": 0.17565417289733887, "epoch": 2.09, "learning_rate": 3.977468421787225e-05, "loss": 0.169, "step": 2205, "task_loss": 0.10887274146080017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7597647600272741, "compression_loss": 0.0, "distillation_loss": 0.15632234513759613, "epoch": 2.09, "learning_rate": 3.976608749391773e-05, "loss": 0.1454, "step": 2206, "task_loss": 0.04727863147854805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7597841660912351, "compression_loss": 0.0, "distillation_loss": 0.2122887820005417, "epoch": 2.1, "learning_rate": 3.975748808763229e-05, "loss": 0.2159, "step": 2207, "task_loss": 0.24815846979618073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7598035659143009, "compression_loss": 0.0, "distillation_loss": 0.2334968000650406, "epoch": 2.1, "learning_rate": 3.974888600057808e-05, "loss": 0.2257, "step": 2208, "task_loss": 0.15524537861347198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7598229594974751, "compression_loss": 0.0, "distillation_loss": 0.2860737442970276, "epoch": 2.1, "learning_rate": 3.974028123431769e-05, "loss": 0.2848, "step": 2209, "task_loss": 0.27306628227233887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7598423468417614, "compression_loss": 0.0, "distillation_loss": 0.17595066130161285, "epoch": 2.1, "learning_rate": 3.973167379041421e-05, "loss": 0.186, "step": 2210, "task_loss": 0.2765977382659912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7598617279481636, "compression_loss": 0.0, "distillation_loss": 0.049867670983076096, "epoch": 2.1, "learning_rate": 3.972306367043126e-05, "loss": 0.0474, "step": 2211, "task_loss": 0.02561107836663723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7598811028176852, "compression_loss": 0.0, "distillation_loss": 0.13108716905117035, "epoch": 2.1, "learning_rate": 3.971445087593288e-05, "loss": 0.1296, "step": 2212, "task_loss": 0.11605143547058105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599004714513299, "compression_loss": 0.0, "distillation_loss": 0.15945231914520264, "epoch": 2.1, "learning_rate": 3.970583540848363e-05, "loss": 0.1644, "step": 2213, "task_loss": 0.20883622765541077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599198338501015, "compression_loss": 0.0, "distillation_loss": 0.3196389973163605, "epoch": 2.1, "learning_rate": 3.969721726964856e-05, "loss": 0.3091, "step": 2214, "task_loss": 0.21398873627185822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599391900150037, "compression_loss": 0.0, "distillation_loss": 0.30575287342071533, "epoch": 2.1, "learning_rate": 3.9688596460993176e-05, "loss": 0.29, "step": 2215, "task_loss": 0.1479155719280243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599585399470399, "compression_loss": 0.0, "distillation_loss": 0.08897826820611954, "epoch": 2.1, "learning_rate": 3.967997298408352e-05, "loss": 0.0882, "step": 2216, "task_loss": 0.08128625154495239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599778836472142, "compression_loss": 0.0, "distillation_loss": 0.0864962711930275, "epoch": 2.11, "learning_rate": 3.967134684048607e-05, "loss": 0.0896, "step": 2217, "task_loss": 0.11729071289300919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7599972211165299, "compression_loss": 0.0, "distillation_loss": 0.23064328730106354, "epoch": 2.11, "learning_rate": 3.96627180317678e-05, "loss": 0.2177, "step": 2218, "task_loss": 0.10095411539077759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.760016552355991, "compression_loss": 0.0, "distillation_loss": 0.13302667438983917, "epoch": 2.11, "learning_rate": 3.965408655949619e-05, "loss": 0.141, "step": 2219, "task_loss": 0.21288727223873138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7600358773666008, "compression_loss": 0.0, "distillation_loss": 0.27681732177734375, "epoch": 2.11, "learning_rate": 3.964545242523917e-05, "loss": 0.2902, "step": 2220, "task_loss": 0.41104960441589355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7600551961493635, "compression_loss": 0.0, "distillation_loss": 0.0774826779961586, "epoch": 2.11, "learning_rate": 3.9636815630565194e-05, "loss": 0.0723, "step": 2221, "task_loss": 0.026051845401525497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7600745087052823, "compression_loss": 0.0, "distillation_loss": 0.0630701333284378, "epoch": 2.11, "learning_rate": 3.962817617704317e-05, "loss": 0.0647, "step": 2222, "task_loss": 0.07926364988088608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7600938150353611, "compression_loss": 0.0, "distillation_loss": 0.22707828879356384, "epoch": 2.11, "learning_rate": 3.9619534066242485e-05, "loss": 0.2251, "step": 2223, "task_loss": 0.20687554776668549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7601131151406035, "compression_loss": 0.0, "distillation_loss": 0.20871593058109283, "epoch": 2.11, "learning_rate": 3.961088929973303e-05, "loss": 0.2074, "step": 2224, "task_loss": 0.1955493688583374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7601324090220134, "compression_loss": 0.0, "distillation_loss": 0.08324743062257767, "epoch": 2.11, "learning_rate": 3.960224187908518e-05, "loss": 0.079, "step": 2225, "task_loss": 0.04101306200027466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7601516966805942, "compression_loss": 0.0, "distillation_loss": 0.22678276896476746, "epoch": 2.11, "learning_rate": 3.959359180586975e-05, "loss": 0.236, "step": 2226, "task_loss": 0.31925228238105774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7601709781173498, "compression_loss": 0.0, "distillation_loss": 0.15277154743671417, "epoch": 2.11, "learning_rate": 3.958493908165809e-05, "loss": 0.1434, "step": 2227, "task_loss": 0.05934750288724899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7601902533332837, "compression_loss": 0.0, "distillation_loss": 0.13886742293834686, "epoch": 2.12, "learning_rate": 3.9576283708022e-05, "loss": 0.1342, "step": 2228, "task_loss": 0.09246313571929932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7602095223293998, "compression_loss": 0.0, "distillation_loss": 0.12244449555873871, "epoch": 2.12, "learning_rate": 3.956762568653378e-05, "loss": 0.1165, "step": 2229, "task_loss": 0.06263736635446548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7602287851067016, "compression_loss": 0.0, "distillation_loss": 0.11867455393075943, "epoch": 2.12, "learning_rate": 3.95589650187662e-05, "loss": 0.1288, "step": 2230, "task_loss": 0.21948017179965973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7602480416661928, "compression_loss": 0.0, "distillation_loss": 0.17664200067520142, "epoch": 2.12, "learning_rate": 3.95503017062925e-05, "loss": 0.1744, "step": 2231, "task_loss": 0.15381231904029846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.760267292008877, "compression_loss": 0.0, "distillation_loss": 0.10297991335391998, "epoch": 2.12, "learning_rate": 3.954163575068643e-05, "loss": 0.0943, "step": 2232, "task_loss": 0.01588231697678566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7602865361357581, "compression_loss": 0.0, "distillation_loss": 0.0686907023191452, "epoch": 2.12, "learning_rate": 3.953296715352218e-05, "loss": 0.0713, "step": 2233, "task_loss": 0.09433852881193161 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7603057740478397, "compression_loss": 0.0, "distillation_loss": 0.11666402965784073, "epoch": 2.12, "learning_rate": 3.952429591637446e-05, "loss": 0.1239, "step": 2234, "task_loss": 0.18875601887702942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7603250057461255, "compression_loss": 0.0, "distillation_loss": 0.02542857825756073, "epoch": 2.12, "learning_rate": 3.951562204081845e-05, "loss": 0.0243, "step": 2235, "task_loss": 0.014179892838001251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7603442312316191, "compression_loss": 0.0, "distillation_loss": 0.024139802902936935, "epoch": 2.12, "learning_rate": 3.950694552842977e-05, "loss": 0.0222, "step": 2236, "task_loss": 0.0047469362616539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7603634505053243, "compression_loss": 0.0, "distillation_loss": 0.2176014482975006, "epoch": 2.12, "learning_rate": 3.949826638078457e-05, "loss": 0.2075, "step": 2237, "task_loss": 0.11611790210008621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7603826635682446, "compression_loss": 0.0, "distillation_loss": 0.1716989278793335, "epoch": 2.13, "learning_rate": 3.948958459945946e-05, "loss": 0.1658, "step": 2238, "task_loss": 0.11308705061674118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604018704213837, "compression_loss": 0.0, "distillation_loss": 0.3528546392917633, "epoch": 2.13, "learning_rate": 3.948090018603153e-05, "loss": 0.3415, "step": 2239, "task_loss": 0.2390662580728531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604210710657455, "compression_loss": 0.0, "distillation_loss": 0.20263050496578217, "epoch": 2.13, "learning_rate": 3.947221314207834e-05, "loss": 0.2039, "step": 2240, "task_loss": 0.21529477834701538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604402655023336, "compression_loss": 0.0, "distillation_loss": 0.11417360603809357, "epoch": 2.13, "learning_rate": 3.9463523469177935e-05, "loss": 0.1254, "step": 2241, "task_loss": 0.22623518109321594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604594537321516, "compression_loss": 0.0, "distillation_loss": 0.05411672592163086, "epoch": 2.13, "learning_rate": 3.9454831168908824e-05, "loss": 0.0636, "step": 2242, "task_loss": 0.14903995394706726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604786357562032, "compression_loss": 0.0, "distillation_loss": 0.09386488795280457, "epoch": 2.13, "learning_rate": 3.9446136242850025e-05, "loss": 0.1017, "step": 2243, "task_loss": 0.1717870980501175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7604978115754921, "compression_loss": 0.0, "distillation_loss": 0.056300774216651917, "epoch": 2.13, "learning_rate": 3.9437438692581e-05, "loss": 0.0646, "step": 2244, "task_loss": 0.1389637589454651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.760516981191022, "compression_loss": 0.0, "distillation_loss": 0.0845174565911293, "epoch": 2.13, "learning_rate": 3.9428738519681704e-05, "loss": 0.0873, "step": 2245, "task_loss": 0.11270405352115631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7605361446037966, "compression_loss": 0.0, "distillation_loss": 0.08052958548069, "epoch": 2.13, "learning_rate": 3.942003572573257e-05, "loss": 0.0794, "step": 2246, "task_loss": 0.06879500299692154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7605553018148195, "compression_loss": 0.0, "distillation_loss": 0.08333942294120789, "epoch": 2.13, "learning_rate": 3.94113303123145e-05, "loss": 0.0817, "step": 2247, "task_loss": 0.06727690994739532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7605744528250944, "compression_loss": 0.0, "distillation_loss": 0.11436310410499573, "epoch": 2.13, "learning_rate": 3.9402622281008874e-05, "loss": 0.1101, "step": 2248, "task_loss": 0.07128392904996872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7605935976356251, "compression_loss": 0.0, "distillation_loss": 0.2693362832069397, "epoch": 2.14, "learning_rate": 3.939391163339754e-05, "loss": 0.2577, "step": 2249, "task_loss": 0.1526666134595871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7606127362474151, "compression_loss": 0.0, "distillation_loss": 0.07121552526950836, "epoch": 2.14, "learning_rate": 3.9385198371062845e-05, "loss": 0.0725, "step": 2250, "task_loss": 0.08450750261545181 }, { "epoch": 2.14, "eval_accuracy": 0.8818807339449541, "eval_loss": 0.48909926414489746, "eval_runtime": 18.0472, "eval_samples_per_second": 48.318, "eval_steps_per_second": 6.04, "step": 2250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7606318686614681, "compression_loss": 0.0, "distillation_loss": 0.31588560342788696, "epoch": 2.14, "learning_rate": 3.937648249558758e-05, "loss": 0.3102, "step": 2251, "task_loss": 0.25923851132392883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7606509948787881, "compression_loss": 0.0, "distillation_loss": 0.0453728511929512, "epoch": 2.14, "learning_rate": 3.9367764008555034e-05, "loss": 0.0592, "step": 2252, "task_loss": 0.18407374620437622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7606701149003784, "compression_loss": 0.0, "distillation_loss": 0.16821657121181488, "epoch": 2.14, "learning_rate": 3.9359042911548955e-05, "loss": 0.1716, "step": 2253, "task_loss": 0.20164065062999725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7606892287272428, "compression_loss": 0.0, "distillation_loss": 0.1436726152896881, "epoch": 2.14, "learning_rate": 3.935031920615358e-05, "loss": 0.1497, "step": 2254, "task_loss": 0.20403361320495605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.760708336360385, "compression_loss": 0.0, "distillation_loss": 0.40433016419410706, "epoch": 2.14, "learning_rate": 3.934159289395361e-05, "loss": 0.3865, "step": 2255, "task_loss": 0.22571320831775665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7607274378008088, "compression_loss": 0.0, "distillation_loss": 0.1408395618200302, "epoch": 2.14, "learning_rate": 3.9332863976534225e-05, "loss": 0.1363, "step": 2256, "task_loss": 0.09514202177524567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7607465330495176, "compression_loss": 0.0, "distillation_loss": 0.18185870349407196, "epoch": 2.14, "learning_rate": 3.9324132455481064e-05, "loss": 0.173, "step": 2257, "task_loss": 0.09325390309095383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7607656221075153, "compression_loss": 0.0, "distillation_loss": 0.1712486296892166, "epoch": 2.14, "learning_rate": 3.931539833238026e-05, "loss": 0.1596, "step": 2258, "task_loss": 0.05442361161112785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7607847049758056, "compression_loss": 0.0, "distillation_loss": 0.15352007746696472, "epoch": 2.15, "learning_rate": 3.930666160881841e-05, "loss": 0.1534, "step": 2259, "task_loss": 0.15225310623645782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608037816553921, "compression_loss": 0.0, "distillation_loss": 0.15940865874290466, "epoch": 2.15, "learning_rate": 3.9297922286382573e-05, "loss": 0.1447, "step": 2260, "task_loss": 0.012211665511131287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608228521472785, "compression_loss": 0.0, "distillation_loss": 0.03598247095942497, "epoch": 2.15, "learning_rate": 3.928918036666029e-05, "loss": 0.0342, "step": 2261, "task_loss": 0.01777983456850052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608419164524685, "compression_loss": 0.0, "distillation_loss": 0.17762696743011475, "epoch": 2.15, "learning_rate": 3.928043585123957e-05, "loss": 0.1757, "step": 2262, "task_loss": 0.15850163996219635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608609745719657, "compression_loss": 0.0, "distillation_loss": 0.0662430077791214, "epoch": 2.15, "learning_rate": 3.927168874170891e-05, "loss": 0.0623, "step": 2263, "task_loss": 0.026589026674628258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608800265067739, "compression_loss": 0.0, "distillation_loss": 0.07591673731803894, "epoch": 2.15, "learning_rate": 3.926293903965726e-05, "loss": 0.0699, "step": 2264, "task_loss": 0.01550477184355259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7608990722578967, "compression_loss": 0.0, "distillation_loss": 0.11433359235525131, "epoch": 2.15, "learning_rate": 3.925418674667405e-05, "loss": 0.1113, "step": 2265, "task_loss": 0.08434649556875229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7609181118263377, "compression_loss": 0.0, "distillation_loss": 0.1352238655090332, "epoch": 2.15, "learning_rate": 3.924543186434915e-05, "loss": 0.1273, "step": 2266, "task_loss": 0.05604308471083641 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7609371452131007, "compression_loss": 0.0, "distillation_loss": 0.17770247161388397, "epoch": 2.15, "learning_rate": 3.923667439427295e-05, "loss": 0.1764, "step": 2267, "task_loss": 0.1651407778263092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7609561724191896, "compression_loss": 0.0, "distillation_loss": 0.06021641939878464, "epoch": 2.15, "learning_rate": 3.922791433803629e-05, "loss": 0.0652, "step": 2268, "task_loss": 0.1097673624753952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7609751934456076, "compression_loss": 0.0, "distillation_loss": 0.057901740074157715, "epoch": 2.15, "learning_rate": 3.921915169723046e-05, "loss": 0.0638, "step": 2269, "task_loss": 0.11656754463911057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7609942082933588, "compression_loss": 0.0, "distillation_loss": 0.12669652700424194, "epoch": 2.16, "learning_rate": 3.921038647344725e-05, "loss": 0.1198, "step": 2270, "task_loss": 0.057524219155311584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7610132169634467, "compression_loss": 0.0, "distillation_loss": 0.11540260910987854, "epoch": 2.16, "learning_rate": 3.920161866827889e-05, "loss": 0.112, "step": 2271, "task_loss": 0.08180786669254303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7610322194568749, "compression_loss": 0.0, "distillation_loss": 0.08194451034069061, "epoch": 2.16, "learning_rate": 3.9192848283318114e-05, "loss": 0.0761, "step": 2272, "task_loss": 0.023745771497488022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7610512157746473, "compression_loss": 0.0, "distillation_loss": 0.09653206914663315, "epoch": 2.16, "learning_rate": 3.91840753201581e-05, "loss": 0.0989, "step": 2273, "task_loss": 0.11974017322063446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7610702059177674, "compression_loss": 0.0, "distillation_loss": 0.17683292925357819, "epoch": 2.16, "learning_rate": 3.917529978039247e-05, "loss": 0.1677, "step": 2274, "task_loss": 0.08567538112401962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761089189887239, "compression_loss": 0.0, "distillation_loss": 0.15673840045928955, "epoch": 2.16, "learning_rate": 3.9166521665615386e-05, "loss": 0.1618, "step": 2275, "task_loss": 0.20756980776786804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7611081676840656, "compression_loss": 0.0, "distillation_loss": 0.16288813948631287, "epoch": 2.16, "learning_rate": 3.91577409774214e-05, "loss": 0.164, "step": 2276, "task_loss": 0.17406190931797028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7611271393092511, "compression_loss": 0.0, "distillation_loss": 0.09783841669559479, "epoch": 2.16, "learning_rate": 3.9148957717405596e-05, "loss": 0.0942, "step": 2277, "task_loss": 0.061522744596004486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7611461047637991, "compression_loss": 0.0, "distillation_loss": 0.1405646800994873, "epoch": 2.16, "learning_rate": 3.914017188716347e-05, "loss": 0.1331, "step": 2278, "task_loss": 0.06549764424562454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7611650640487132, "compression_loss": 0.0, "distillation_loss": 0.09124282747507095, "epoch": 2.16, "learning_rate": 3.913138348829102e-05, "loss": 0.0917, "step": 2279, "task_loss": 0.09613334387540817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7611840171649973, "compression_loss": 0.0, "distillation_loss": 0.1638939529657364, "epoch": 2.17, "learning_rate": 3.91225925223847e-05, "loss": 0.161, "step": 2280, "task_loss": 0.13536801934242249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7612029641136548, "compression_loss": 0.0, "distillation_loss": 0.0714389756321907, "epoch": 2.17, "learning_rate": 3.911379899104144e-05, "loss": 0.0746, "step": 2281, "task_loss": 0.10342606157064438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7612219048956895, "compression_loss": 0.0, "distillation_loss": 0.1365533024072647, "epoch": 2.17, "learning_rate": 3.910500289585862e-05, "loss": 0.142, "step": 2282, "task_loss": 0.19071495532989502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7612408395121052, "compression_loss": 0.0, "distillation_loss": 0.04966387897729874, "epoch": 2.17, "learning_rate": 3.90962042384341e-05, "loss": 0.0503, "step": 2283, "task_loss": 0.05590268224477768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7612597679639055, "compression_loss": 0.0, "distillation_loss": 0.03692065179347992, "epoch": 2.17, "learning_rate": 3.908740302036618e-05, "loss": 0.0443, "step": 2284, "task_loss": 0.110334612429142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761278690252094, "compression_loss": 0.0, "distillation_loss": 0.10264438390731812, "epoch": 2.17, "learning_rate": 3.907859924325366e-05, "loss": 0.1084, "step": 2285, "task_loss": 0.16008785367012024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7612976063776745, "compression_loss": 0.0, "distillation_loss": 0.11101731657981873, "epoch": 2.17, "learning_rate": 3.906979290869578e-05, "loss": 0.105, "step": 2286, "task_loss": 0.05065052583813667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7613165163416505, "compression_loss": 0.0, "distillation_loss": 0.14465484023094177, "epoch": 2.17, "learning_rate": 3.9060984018292267e-05, "loss": 0.1474, "step": 2287, "task_loss": 0.17257431149482727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761335420145026, "compression_loss": 0.0, "distillation_loss": 0.30154234170913696, "epoch": 2.17, "learning_rate": 3.905217257364328e-05, "loss": 0.2859, "step": 2288, "task_loss": 0.14557313919067383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7613543177888044, "compression_loss": 0.0, "distillation_loss": 0.09943203628063202, "epoch": 2.17, "learning_rate": 3.904335857634948e-05, "loss": 0.1025, "step": 2289, "task_loss": 0.12985821068286896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7613732092739895, "compression_loss": 0.0, "distillation_loss": 0.08901150524616241, "epoch": 2.17, "learning_rate": 3.9034542028011944e-05, "loss": 0.0838, "step": 2290, "task_loss": 0.03706664592027664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761392094601585, "compression_loss": 0.0, "distillation_loss": 0.01898990385234356, "epoch": 2.18, "learning_rate": 3.902572293023227e-05, "loss": 0.0269, "step": 2291, "task_loss": 0.09843745082616806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7614109737725945, "compression_loss": 0.0, "distillation_loss": 0.04945681244134903, "epoch": 2.18, "learning_rate": 3.9016901284612474e-05, "loss": 0.046, "step": 2292, "task_loss": 0.015036612749099731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7614298467880216, "compression_loss": 0.0, "distillation_loss": 0.07033106684684753, "epoch": 2.18, "learning_rate": 3.9008077092755055e-05, "loss": 0.0685, "step": 2293, "task_loss": 0.05228840187191963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7614487136488702, "compression_loss": 0.0, "distillation_loss": 0.02366098389029503, "epoch": 2.18, "learning_rate": 3.899925035626296e-05, "loss": 0.0333, "step": 2294, "task_loss": 0.1205047145485878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761467574356144, "compression_loss": 0.0, "distillation_loss": 0.11169403791427612, "epoch": 2.18, "learning_rate": 3.899042107673962e-05, "loss": 0.1119, "step": 2295, "task_loss": 0.11369898915290833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7614864289108464, "compression_loss": 0.0, "distillation_loss": 0.039377085864543915, "epoch": 2.18, "learning_rate": 3.898158925578893e-05, "loss": 0.0445, "step": 2296, "task_loss": 0.09109679609537125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615052773139814, "compression_loss": 0.0, "distillation_loss": 0.32753410935401917, "epoch": 2.18, "learning_rate": 3.89727548950152e-05, "loss": 0.3187, "step": 2297, "task_loss": 0.23894454538822174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615241195665524, "compression_loss": 0.0, "distillation_loss": 0.13404498994350433, "epoch": 2.18, "learning_rate": 3.8963917996023245e-05, "loss": 0.1353, "step": 2298, "task_loss": 0.14633318781852722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615429556695633, "compression_loss": 0.0, "distillation_loss": 0.11489589512348175, "epoch": 2.18, "learning_rate": 3.8955078560418345e-05, "loss": 0.1183, "step": 2299, "task_loss": 0.14893510937690735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615617856240177, "compression_loss": 0.0, "distillation_loss": 0.02402464672923088, "epoch": 2.18, "learning_rate": 3.894623658980622e-05, "loss": 0.0309, "step": 2300, "task_loss": 0.09274609386920929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615806094309192, "compression_loss": 0.0, "distillation_loss": 0.12553593516349792, "epoch": 2.19, "learning_rate": 3.8937392085793036e-05, "loss": 0.1179, "step": 2301, "task_loss": 0.04914803430438042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7615994270912717, "compression_loss": 0.0, "distillation_loss": 0.03810520097613335, "epoch": 2.19, "learning_rate": 3.892854504998546e-05, "loss": 0.0352, "step": 2302, "task_loss": 0.008859049528837204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7616182386060786, "compression_loss": 0.0, "distillation_loss": 0.10612446069717407, "epoch": 2.19, "learning_rate": 3.891969548399061e-05, "loss": 0.1083, "step": 2303, "task_loss": 0.12744131684303284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7616370439763438, "compression_loss": 0.0, "distillation_loss": 0.08019978553056717, "epoch": 2.19, "learning_rate": 3.891084338941603e-05, "loss": 0.0734, "step": 2304, "task_loss": 0.012498440220952034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7616558432030709, "compression_loss": 0.0, "distillation_loss": 0.05349978804588318, "epoch": 2.19, "learning_rate": 3.8901988767869744e-05, "loss": 0.0752, "step": 2305, "task_loss": 0.2700839042663574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7616746362872636, "compression_loss": 0.0, "distillation_loss": 0.052905574440956116, "epoch": 2.19, "learning_rate": 3.8893131620960254e-05, "loss": 0.0546, "step": 2306, "task_loss": 0.06945595890283585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7616934232299255, "compression_loss": 0.0, "distillation_loss": 0.13802888989448547, "epoch": 2.19, "learning_rate": 3.88842719502965e-05, "loss": 0.1321, "step": 2307, "task_loss": 0.07853611558675766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7617122040320605, "compression_loss": 0.0, "distillation_loss": 0.06617145985364914, "epoch": 2.19, "learning_rate": 3.887540975748787e-05, "loss": 0.0631, "step": 2308, "task_loss": 0.03529322147369385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761730978694672, "compression_loss": 0.0, "distillation_loss": 0.14270713925361633, "epoch": 2.19, "learning_rate": 3.8866545044144234e-05, "loss": 0.1358, "step": 2309, "task_loss": 0.07395662367343903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7617497472187639, "compression_loss": 0.0, "distillation_loss": 0.12063492834568024, "epoch": 2.19, "learning_rate": 3.885767781187591e-05, "loss": 0.1182, "step": 2310, "task_loss": 0.09611006081104279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7617685096053398, "compression_loss": 0.0, "distillation_loss": 0.15884768962860107, "epoch": 2.19, "learning_rate": 3.884880806229367e-05, "loss": 0.1541, "step": 2311, "task_loss": 0.1109381914138794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7617872658554034, "compression_loss": 0.0, "distillation_loss": 0.2637111246585846, "epoch": 2.2, "learning_rate": 3.883993579700875e-05, "loss": 0.2475, "step": 2312, "task_loss": 0.10145562142133713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7618060159699582, "compression_loss": 0.0, "distillation_loss": 0.09027013182640076, "epoch": 2.2, "learning_rate": 3.883106101763285e-05, "loss": 0.0899, "step": 2313, "task_loss": 0.08697813749313354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7618247599500082, "compression_loss": 0.0, "distillation_loss": 0.10050283372402191, "epoch": 2.2, "learning_rate": 3.882218372577809e-05, "loss": 0.0967, "step": 2314, "task_loss": 0.062062062323093414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7618434977965569, "compression_loss": 0.0, "distillation_loss": 0.21101830899715424, "epoch": 2.2, "learning_rate": 3.881330392305709e-05, "loss": 0.2235, "step": 2315, "task_loss": 0.3360675871372223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.761862229510608, "compression_loss": 0.0, "distillation_loss": 0.20589599013328552, "epoch": 2.2, "learning_rate": 3.8804421611082916e-05, "loss": 0.1933, "step": 2316, "task_loss": 0.07991321384906769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7618809550931652, "compression_loss": 0.0, "distillation_loss": 0.1543605923652649, "epoch": 2.2, "learning_rate": 3.8795536791469066e-05, "loss": 0.1454, "step": 2317, "task_loss": 0.06446463614702225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7618996745452322, "compression_loss": 0.0, "distillation_loss": 0.09525361657142639, "epoch": 2.2, "learning_rate": 3.8786649465829516e-05, "loss": 0.0921, "step": 2318, "task_loss": 0.0634106770157814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7619183878678126, "compression_loss": 0.0, "distillation_loss": 0.041672758758068085, "epoch": 2.2, "learning_rate": 3.8777759635778696e-05, "loss": 0.0385, "step": 2319, "task_loss": 0.009624077007174492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7619370950619102, "compression_loss": 0.0, "distillation_loss": 0.13709776103496552, "epoch": 2.2, "learning_rate": 3.876886730293149e-05, "loss": 0.1341, "step": 2320, "task_loss": 0.10736387968063354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7619557961285286, "compression_loss": 0.0, "distillation_loss": 0.0763949453830719, "epoch": 2.2, "learning_rate": 3.8759972468903215e-05, "loss": 0.0731, "step": 2321, "task_loss": 0.04355807602405548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7619744910686715, "compression_loss": 0.0, "distillation_loss": 0.08063163608312607, "epoch": 2.21, "learning_rate": 3.875107513530968e-05, "loss": 0.0841, "step": 2322, "task_loss": 0.11514770984649658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7619931798833426, "compression_loss": 0.0, "distillation_loss": 0.05136069282889366, "epoch": 2.21, "learning_rate": 3.874217530376711e-05, "loss": 0.0527, "step": 2323, "task_loss": 0.06518974155187607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7620118625735456, "compression_loss": 0.0, "distillation_loss": 0.045473020523786545, "epoch": 2.21, "learning_rate": 3.873327297589223e-05, "loss": 0.0417, "step": 2324, "task_loss": 0.007644519209861755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7620305391402841, "compression_loss": 0.0, "distillation_loss": 0.033313095569610596, "epoch": 2.21, "learning_rate": 3.8724368153302166e-05, "loss": 0.0488, "step": 2325, "task_loss": 0.18855777382850647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7620492095845618, "compression_loss": 0.0, "distillation_loss": 0.23782016336917877, "epoch": 2.21, "learning_rate": 3.871546083761453e-05, "loss": 0.2333, "step": 2326, "task_loss": 0.19272853434085846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7620678739073825, "compression_loss": 0.0, "distillation_loss": 0.10806293785572052, "epoch": 2.21, "learning_rate": 3.870655103044738e-05, "loss": 0.1051, "step": 2327, "task_loss": 0.07805277407169342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7620865321097497, "compression_loss": 0.0, "distillation_loss": 0.1450902819633484, "epoch": 2.21, "learning_rate": 3.8697638733419216e-05, "loss": 0.1578, "step": 2328, "task_loss": 0.2723790109157562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7621051841926673, "compression_loss": 0.0, "distillation_loss": 0.09044960141181946, "epoch": 2.21, "learning_rate": 3.8688723948149014e-05, "loss": 0.0879, "step": 2329, "task_loss": 0.0644555538892746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7621238301571387, "compression_loss": 0.0, "distillation_loss": 0.036163873970508575, "epoch": 2.21, "learning_rate": 3.867980667625618e-05, "loss": 0.0502, "step": 2330, "task_loss": 0.17629528045654297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7621424700041679, "compression_loss": 0.0, "distillation_loss": 0.08255277574062347, "epoch": 2.21, "learning_rate": 3.867088691936058e-05, "loss": 0.0778, "step": 2331, "task_loss": 0.03511197119951248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7621611037347583, "compression_loss": 0.0, "distillation_loss": 0.1695278137922287, "epoch": 2.21, "learning_rate": 3.8661964679082535e-05, "loss": 0.1618, "step": 2332, "task_loss": 0.09199361503124237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7621797313499138, "compression_loss": 0.0, "distillation_loss": 0.052597202360630035, "epoch": 2.22, "learning_rate": 3.8653039957042806e-05, "loss": 0.0562, "step": 2333, "task_loss": 0.08904554694890976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762198352850638, "compression_loss": 0.0, "distillation_loss": 0.154799684882164, "epoch": 2.22, "learning_rate": 3.8644112754862614e-05, "loss": 0.1498, "step": 2334, "task_loss": 0.10432252287864685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7622169682379345, "compression_loss": 0.0, "distillation_loss": 0.06855852156877518, "epoch": 2.22, "learning_rate": 3.8635183074163636e-05, "loss": 0.0654, "step": 2335, "task_loss": 0.03740303963422775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7622355775128071, "compression_loss": 0.0, "distillation_loss": 0.031943485140800476, "epoch": 2.22, "learning_rate": 3.862625091656797e-05, "loss": 0.039, "step": 2336, "task_loss": 0.1027441918849945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7622541806762594, "compression_loss": 0.0, "distillation_loss": 0.08677913248538971, "epoch": 2.22, "learning_rate": 3.861731628369822e-05, "loss": 0.0888, "step": 2337, "task_loss": 0.10726401954889297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7622727777292951, "compression_loss": 0.0, "distillation_loss": 0.03856203332543373, "epoch": 2.22, "learning_rate": 3.8608379177177375e-05, "loss": 0.0354, "step": 2338, "task_loss": 0.0071256812661886215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7622913686729179, "compression_loss": 0.0, "distillation_loss": 0.04021540284156799, "epoch": 2.22, "learning_rate": 3.8599439598628916e-05, "loss": 0.0368, "step": 2339, "task_loss": 0.005587218329310417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7623099535081316, "compression_loss": 0.0, "distillation_loss": 0.035530779510736465, "epoch": 2.22, "learning_rate": 3.8590497549676753e-05, "loss": 0.0384, "step": 2340, "task_loss": 0.06408775597810745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7623285322359397, "compression_loss": 0.0, "distillation_loss": 0.16440139710903168, "epoch": 2.22, "learning_rate": 3.858155303194526e-05, "loss": 0.1682, "step": 2341, "task_loss": 0.20209884643554688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7623471048573459, "compression_loss": 0.0, "distillation_loss": 0.09271198511123657, "epoch": 2.22, "learning_rate": 3.8572606047059254e-05, "loss": 0.0869, "step": 2342, "task_loss": 0.03472534567117691 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762365671373354, "compression_loss": 0.0, "distillation_loss": 0.22541561722755432, "epoch": 2.23, "learning_rate": 3.856365659664399e-05, "loss": 0.2111, "step": 2343, "task_loss": 0.08197568356990814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7623842317849675, "compression_loss": 0.0, "distillation_loss": 0.1988585889339447, "epoch": 2.23, "learning_rate": 3.855470468232518e-05, "loss": 0.2026, "step": 2344, "task_loss": 0.23670420050621033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624027860931902, "compression_loss": 0.0, "distillation_loss": 0.08545896410942078, "epoch": 2.23, "learning_rate": 3.854575030572898e-05, "loss": 0.0809, "step": 2345, "task_loss": 0.040093451738357544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624213342990259, "compression_loss": 0.0, "distillation_loss": 0.11442442238330841, "epoch": 2.23, "learning_rate": 3.853679346848201e-05, "loss": 0.1172, "step": 2346, "task_loss": 0.14235654473304749 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624398764034781, "compression_loss": 0.0, "distillation_loss": 0.08486449718475342, "epoch": 2.23, "learning_rate": 3.8527834172211306e-05, "loss": 0.0992, "step": 2347, "task_loss": 0.22856228053569794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624584124075505, "compression_loss": 0.0, "distillation_loss": 0.13581222295761108, "epoch": 2.23, "learning_rate": 3.851887241854438e-05, "loss": 0.129, "step": 2348, "task_loss": 0.06761342287063599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624769423122468, "compression_loss": 0.0, "distillation_loss": 0.3619718551635742, "epoch": 2.23, "learning_rate": 3.850990820910917e-05, "loss": 0.3528, "step": 2349, "task_loss": 0.27037566900253296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7624954661185708, "compression_loss": 0.0, "distillation_loss": 0.1374235451221466, "epoch": 2.23, "learning_rate": 3.8500941545534065e-05, "loss": 0.137, "step": 2350, "task_loss": 0.13285070657730103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762513983827526, "compression_loss": 0.0, "distillation_loss": 0.03058495558798313, "epoch": 2.23, "learning_rate": 3.849197242944791e-05, "loss": 0.0281, "step": 2351, "task_loss": 0.0058120060712099075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7625324954401163, "compression_loss": 0.0, "distillation_loss": 0.26459288597106934, "epoch": 2.23, "learning_rate": 3.8483000862479986e-05, "loss": 0.2585, "step": 2352, "task_loss": 0.2032066434621811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7625510009573451, "compression_loss": 0.0, "distillation_loss": 0.11497370898723602, "epoch": 2.23, "learning_rate": 3.8474026846260015e-05, "loss": 0.1074, "step": 2353, "task_loss": 0.039116568863391876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7625695003802162, "compression_loss": 0.0, "distillation_loss": 0.10177917033433914, "epoch": 2.24, "learning_rate": 3.846505038241818e-05, "loss": 0.098, "step": 2354, "task_loss": 0.0641433447599411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7625879937097334, "compression_loss": 0.0, "distillation_loss": 0.1023842841386795, "epoch": 2.24, "learning_rate": 3.84560714725851e-05, "loss": 0.1054, "step": 2355, "task_loss": 0.13256244361400604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7626064809469003, "compression_loss": 0.0, "distillation_loss": 0.21765132248401642, "epoch": 2.24, "learning_rate": 3.8447090118391814e-05, "loss": 0.2109, "step": 2356, "task_loss": 0.14984160661697388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7626249620927206, "compression_loss": 0.0, "distillation_loss": 0.10686811059713364, "epoch": 2.24, "learning_rate": 3.8438106321469864e-05, "loss": 0.1046, "step": 2357, "task_loss": 0.08399415016174316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7626434371481979, "compression_loss": 0.0, "distillation_loss": 0.06657497584819794, "epoch": 2.24, "learning_rate": 3.842912008345117e-05, "loss": 0.0629, "step": 2358, "task_loss": 0.029489833861589432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762661906114336, "compression_loss": 0.0, "distillation_loss": 0.17578309774398804, "epoch": 2.24, "learning_rate": 3.842013140596815e-05, "loss": 0.1783, "step": 2359, "task_loss": 0.20061953365802765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7626803689921385, "compression_loss": 0.0, "distillation_loss": 0.08999986201524734, "epoch": 2.24, "learning_rate": 3.841114029065362e-05, "loss": 0.1037, "step": 2360, "task_loss": 0.22721917927265167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7626988257826091, "compression_loss": 0.0, "distillation_loss": 0.18849384784698486, "epoch": 2.24, "learning_rate": 3.8402146739140874e-05, "loss": 0.1817, "step": 2361, "task_loss": 0.12036500871181488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7627172764867515, "compression_loss": 0.0, "distillation_loss": 0.03334838151931763, "epoch": 2.24, "learning_rate": 3.8393150753063614e-05, "loss": 0.0326, "step": 2362, "task_loss": 0.025991789996623993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7627357211055693, "compression_loss": 0.0, "distillation_loss": 0.09625150263309479, "epoch": 2.24, "learning_rate": 3.838415233405603e-05, "loss": 0.1046, "step": 2363, "task_loss": 0.17937156558036804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7627541596400663, "compression_loss": 0.0, "distillation_loss": 0.07551825791597366, "epoch": 2.25, "learning_rate": 3.837515148375271e-05, "loss": 0.0853, "step": 2364, "task_loss": 0.17311903834342957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7627725920912461, "compression_loss": 0.0, "distillation_loss": 0.06278804689645767, "epoch": 2.25, "learning_rate": 3.836614820378871e-05, "loss": 0.0712, "step": 2365, "task_loss": 0.14707757532596588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7627910184601125, "compression_loss": 0.0, "distillation_loss": 0.045216597616672516, "epoch": 2.25, "learning_rate": 3.835714249579952e-05, "loss": 0.0581, "step": 2366, "task_loss": 0.1742611825466156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762809438747669, "compression_loss": 0.0, "distillation_loss": 0.28981393575668335, "epoch": 2.25, "learning_rate": 3.8348134361421064e-05, "loss": 0.2989, "step": 2367, "task_loss": 0.38077259063720703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7628278529549195, "compression_loss": 0.0, "distillation_loss": 0.0407429076731205, "epoch": 2.25, "learning_rate": 3.8339123802289716e-05, "loss": 0.037, "step": 2368, "task_loss": 0.0035578403621912003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7628462610828675, "compression_loss": 0.0, "distillation_loss": 0.08070795238018036, "epoch": 2.25, "learning_rate": 3.8330110820042285e-05, "loss": 0.0862, "step": 2369, "task_loss": 0.1353619545698166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7628646631325168, "compression_loss": 0.0, "distillation_loss": 0.05324525758624077, "epoch": 2.25, "learning_rate": 3.8321095416316024e-05, "loss": 0.0695, "step": 2370, "task_loss": 0.21613724529743195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.762883059104871, "compression_loss": 0.0, "distillation_loss": 0.04425017535686493, "epoch": 2.25, "learning_rate": 3.831207759274863e-05, "loss": 0.0407, "step": 2371, "task_loss": 0.008313026279211044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629014490009337, "compression_loss": 0.0, "distillation_loss": 0.07859969139099121, "epoch": 2.25, "learning_rate": 3.8303057350978224e-05, "loss": 0.1042, "step": 2372, "task_loss": 0.3349103033542633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629198328217088, "compression_loss": 0.0, "distillation_loss": 0.09541267156600952, "epoch": 2.25, "learning_rate": 3.829403469264339e-05, "loss": 0.0935, "step": 2373, "task_loss": 0.07653743773698807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629382105681998, "compression_loss": 0.0, "distillation_loss": 0.10529537498950958, "epoch": 2.25, "learning_rate": 3.828500961938313e-05, "loss": 0.0988, "step": 2374, "task_loss": 0.040317974984645844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629565822414105, "compression_loss": 0.0, "distillation_loss": 0.050773873925209045, "epoch": 2.26, "learning_rate": 3.827598213283688e-05, "loss": 0.0477, "step": 2375, "task_loss": 0.019673151895403862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629749478423445, "compression_loss": 0.0, "distillation_loss": 0.06847432255744934, "epoch": 2.26, "learning_rate": 3.8266952234644545e-05, "loss": 0.0684, "step": 2376, "task_loss": 0.06723335385322571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7629933073720057, "compression_loss": 0.0, "distillation_loss": 0.1094050258398056, "epoch": 2.26, "learning_rate": 3.825791992644644e-05, "loss": 0.1155, "step": 2377, "task_loss": 0.17028164863586426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7630116608313974, "compression_loss": 0.0, "distillation_loss": 0.04125671088695526, "epoch": 2.26, "learning_rate": 3.824888520988333e-05, "loss": 0.038, "step": 2378, "task_loss": 0.008307870477437973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7630300082215236, "compression_loss": 0.0, "distillation_loss": 0.0585186630487442, "epoch": 2.26, "learning_rate": 3.823984808659641e-05, "loss": 0.0588, "step": 2379, "task_loss": 0.06130526214838028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7630483495433877, "compression_loss": 0.0, "distillation_loss": 0.03627898544073105, "epoch": 2.26, "learning_rate": 3.8230808558227335e-05, "loss": 0.0331, "step": 2380, "task_loss": 0.004686055704951286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7630666847979937, "compression_loss": 0.0, "distillation_loss": 0.04550597444176674, "epoch": 2.26, "learning_rate": 3.8221766626418155e-05, "loss": 0.0478, "step": 2381, "task_loss": 0.06875459849834442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763085013986345, "compression_loss": 0.0, "distillation_loss": 0.057246215641498566, "epoch": 2.26, "learning_rate": 3.821272229281139e-05, "loss": 0.0576, "step": 2382, "task_loss": 0.06067638099193573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631033371094456, "compression_loss": 0.0, "distillation_loss": 0.17733870446681976, "epoch": 2.26, "learning_rate": 3.820367555904999e-05, "loss": 0.1736, "step": 2383, "task_loss": 0.13981691002845764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631216541682988, "compression_loss": 0.0, "distillation_loss": 0.14336426556110382, "epoch": 2.26, "learning_rate": 3.819462642677733e-05, "loss": 0.1475, "step": 2384, "task_loss": 0.184513121843338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631399651639086, "compression_loss": 0.0, "distillation_loss": 0.1907598078250885, "epoch": 2.26, "learning_rate": 3.818557489763724e-05, "loss": 0.1909, "step": 2385, "task_loss": 0.19197434186935425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631582700972785, "compression_loss": 0.0, "distillation_loss": 0.09918617457151413, "epoch": 2.27, "learning_rate": 3.817652097327396e-05, "loss": 0.0916, "step": 2386, "task_loss": 0.023263249546289444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631765689694123, "compression_loss": 0.0, "distillation_loss": 0.0876680240035057, "epoch": 2.27, "learning_rate": 3.81674646553322e-05, "loss": 0.0851, "step": 2387, "task_loss": 0.06229857727885246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7631948617813136, "compression_loss": 0.0, "distillation_loss": 0.09220877289772034, "epoch": 2.27, "learning_rate": 3.815840594545706e-05, "loss": 0.0941, "step": 2388, "task_loss": 0.11108911037445068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763213148533986, "compression_loss": 0.0, "distillation_loss": 0.10177421569824219, "epoch": 2.27, "learning_rate": 3.814934484529411e-05, "loss": 0.1073, "step": 2389, "task_loss": 0.157059907913208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7632314292284335, "compression_loss": 0.0, "distillation_loss": 0.05992235243320465, "epoch": 2.27, "learning_rate": 3.8140281356489346e-05, "loss": 0.0608, "step": 2390, "task_loss": 0.06858600676059723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7632497038656594, "compression_loss": 0.0, "distillation_loss": 0.0574650838971138, "epoch": 2.27, "learning_rate": 3.8131215480689184e-05, "loss": 0.0586, "step": 2391, "task_loss": 0.06853343546390533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7632679724466676, "compression_loss": 0.0, "distillation_loss": 0.21620208024978638, "epoch": 2.27, "learning_rate": 3.812214721954049e-05, "loss": 0.2103, "step": 2392, "task_loss": 0.15719348192214966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7632862349724616, "compression_loss": 0.0, "distillation_loss": 0.17695803940296173, "epoch": 2.27, "learning_rate": 3.811307657469055e-05, "loss": 0.1758, "step": 2393, "task_loss": 0.16526271402835846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633044914440454, "compression_loss": 0.0, "distillation_loss": 0.05041082203388214, "epoch": 2.27, "learning_rate": 3.8104003547787105e-05, "loss": 0.0526, "step": 2394, "task_loss": 0.07256819307804108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633227418624224, "compression_loss": 0.0, "distillation_loss": 0.03695048391819, "epoch": 2.27, "learning_rate": 3.809492814047831e-05, "loss": 0.0389, "step": 2395, "task_loss": 0.05642509087920189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633409862285965, "compression_loss": 0.0, "distillation_loss": 0.1277005672454834, "epoch": 2.28, "learning_rate": 3.8085850354412745e-05, "loss": 0.1204, "step": 2396, "task_loss": 0.05449753627181053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633592245435712, "compression_loss": 0.0, "distillation_loss": 0.1139630377292633, "epoch": 2.28, "learning_rate": 3.807677019123944e-05, "loss": 0.1069, "step": 2397, "task_loss": 0.042952507734298706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633774568083502, "compression_loss": 0.0, "distillation_loss": 0.24451753497123718, "epoch": 2.28, "learning_rate": 3.806768765260785e-05, "loss": 0.2416, "step": 2398, "task_loss": 0.2154046595096588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7633956830239372, "compression_loss": 0.0, "distillation_loss": 0.20844148099422455, "epoch": 2.28, "learning_rate": 3.805860274016787e-05, "loss": 0.2083, "step": 2399, "task_loss": 0.20691928267478943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763413903191336, "compression_loss": 0.0, "distillation_loss": 0.3789367079734802, "epoch": 2.28, "learning_rate": 3.8049515455569816e-05, "loss": 0.3638, "step": 2400, "task_loss": 0.22742155194282532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7634321173115501, "compression_loss": 0.0, "distillation_loss": 0.18300700187683105, "epoch": 2.28, "learning_rate": 3.804042580046442e-05, "loss": 0.1727, "step": 2401, "task_loss": 0.08020967990159988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7634503253855833, "compression_loss": 0.0, "distillation_loss": 0.14992451667785645, "epoch": 2.28, "learning_rate": 3.803133377650288e-05, "loss": 0.1407, "step": 2402, "task_loss": 0.05811336264014244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7634685274144393, "compression_loss": 0.0, "distillation_loss": 0.20229187607765198, "epoch": 2.28, "learning_rate": 3.80222393853368e-05, "loss": 0.1983, "step": 2403, "task_loss": 0.16219428181648254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7634867233991217, "compression_loss": 0.0, "distillation_loss": 0.12786564230918884, "epoch": 2.28, "learning_rate": 3.801314262861822e-05, "loss": 0.1255, "step": 2404, "task_loss": 0.10410147160291672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635049133406342, "compression_loss": 0.0, "distillation_loss": 0.23157238960266113, "epoch": 2.28, "learning_rate": 3.800404350799961e-05, "loss": 0.2361, "step": 2405, "task_loss": 0.27698665857315063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635230972399806, "compression_loss": 0.0, "distillation_loss": 0.06388011574745178, "epoch": 2.28, "learning_rate": 3.799494202513386e-05, "loss": 0.0808, "step": 2406, "task_loss": 0.23331348598003387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635412750981644, "compression_loss": 0.0, "distillation_loss": 0.11542148143053055, "epoch": 2.29, "learning_rate": 3.798583818167432e-05, "loss": 0.1163, "step": 2407, "task_loss": 0.12456656247377396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635594469161894, "compression_loss": 0.0, "distillation_loss": 0.08499371260404587, "epoch": 2.29, "learning_rate": 3.797673197927473e-05, "loss": 0.0945, "step": 2408, "task_loss": 0.17979812622070312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635776126950591, "compression_loss": 0.0, "distillation_loss": 0.1530931293964386, "epoch": 2.29, "learning_rate": 3.796762341958927e-05, "loss": 0.1582, "step": 2409, "task_loss": 0.20382657647132874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7635957724357775, "compression_loss": 0.0, "distillation_loss": 0.07288970053195953, "epoch": 2.29, "learning_rate": 3.795851250427257e-05, "loss": 0.074, "step": 2410, "task_loss": 0.08438847959041595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763613926139348, "compression_loss": 0.0, "distillation_loss": 0.23029862344264984, "epoch": 2.29, "learning_rate": 3.794939923497967e-05, "loss": 0.2204, "step": 2411, "task_loss": 0.13161104917526245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7636320738067744, "compression_loss": 0.0, "distillation_loss": 0.2242552489042282, "epoch": 2.29, "learning_rate": 3.794028361336603e-05, "loss": 0.2215, "step": 2412, "task_loss": 0.1969766765832901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7636502154390605, "compression_loss": 0.0, "distillation_loss": 0.13051095604896545, "epoch": 2.29, "learning_rate": 3.793116564108754e-05, "loss": 0.122, "step": 2413, "task_loss": 0.04499632865190506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7636683510372098, "compression_loss": 0.0, "distillation_loss": 0.03876876085996628, "epoch": 2.29, "learning_rate": 3.7922045319800545e-05, "loss": 0.0358, "step": 2414, "task_loss": 0.008744785562157631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763686480602226, "compression_loss": 0.0, "distillation_loss": 0.05910433828830719, "epoch": 2.29, "learning_rate": 3.7912922651161783e-05, "loss": 0.0699, "step": 2415, "task_loss": 0.16721457242965698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7637046041351129, "compression_loss": 0.0, "distillation_loss": 0.037029679864645004, "epoch": 2.29, "learning_rate": 3.790379763682844e-05, "loss": 0.0346, "step": 2416, "task_loss": 0.01284867525100708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7637227216368739, "compression_loss": 0.0, "distillation_loss": 0.25016137957572937, "epoch": 2.3, "learning_rate": 3.7894670278458096e-05, "loss": 0.2403, "step": 2417, "task_loss": 0.15121634304523468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7637408331085132, "compression_loss": 0.0, "distillation_loss": 0.033061422407627106, "epoch": 2.3, "learning_rate": 3.7885540577708804e-05, "loss": 0.036, "step": 2418, "task_loss": 0.06259030848741531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7637589385510339, "compression_loss": 0.0, "distillation_loss": 0.027268044650554657, "epoch": 2.3, "learning_rate": 3.7876408536239006e-05, "loss": 0.0253, "step": 2419, "task_loss": 0.007266120985150337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76377703796544, "compression_loss": 0.0, "distillation_loss": 0.14146488904953003, "epoch": 2.3, "learning_rate": 3.7867274155707585e-05, "loss": 0.1379, "step": 2420, "task_loss": 0.10578147321939468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7637951313527352, "compression_loss": 0.0, "distillation_loss": 0.25725674629211426, "epoch": 2.3, "learning_rate": 3.7858137437773845e-05, "loss": 0.2562, "step": 2421, "task_loss": 0.2463308572769165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.763813218713923, "compression_loss": 0.0, "distillation_loss": 0.15272045135498047, "epoch": 2.3, "learning_rate": 3.784899838409751e-05, "loss": 0.1586, "step": 2422, "task_loss": 0.2110539674758911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7638313000500074, "compression_loss": 0.0, "distillation_loss": 0.040354013442993164, "epoch": 2.3, "learning_rate": 3.783985699633874e-05, "loss": 0.0399, "step": 2423, "task_loss": 0.03630523383617401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7638493753619917, "compression_loss": 0.0, "distillation_loss": 0.1408417522907257, "epoch": 2.3, "learning_rate": 3.783071327615811e-05, "loss": 0.1307, "step": 2424, "task_loss": 0.03991064056754112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7638674446508799, "compression_loss": 0.0, "distillation_loss": 0.0335991308093071, "epoch": 2.3, "learning_rate": 3.7821567225216615e-05, "loss": 0.0307, "step": 2425, "task_loss": 0.004980664700269699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7638855079176754, "compression_loss": 0.0, "distillation_loss": 0.13876627385616302, "epoch": 2.3, "learning_rate": 3.781241884517569e-05, "loss": 0.1474, "step": 2426, "task_loss": 0.2252703756093979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639035651633821, "compression_loss": 0.0, "distillation_loss": 0.18450605869293213, "epoch": 2.3, "learning_rate": 3.780326813769717e-05, "loss": 0.1689, "step": 2427, "task_loss": 0.028058160096406937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639216163890036, "compression_loss": 0.0, "distillation_loss": 0.42347198724746704, "epoch": 2.31, "learning_rate": 3.779411510444334e-05, "loss": 0.4095, "step": 2428, "task_loss": 0.2832689583301544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639396615955436, "compression_loss": 0.0, "distillation_loss": 0.09357698261737823, "epoch": 2.31, "learning_rate": 3.778495974707688e-05, "loss": 0.0918, "step": 2429, "task_loss": 0.07574363052845001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639577007840057, "compression_loss": 0.0, "distillation_loss": 0.15994183719158173, "epoch": 2.31, "learning_rate": 3.7775802067260905e-05, "loss": 0.1569, "step": 2430, "task_loss": 0.1296941488981247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639757339553938, "compression_loss": 0.0, "distillation_loss": 0.078113853931427, "epoch": 2.31, "learning_rate": 3.776664206665896e-05, "loss": 0.0896, "step": 2431, "task_loss": 0.19323307275772095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7639937611107113, "compression_loss": 0.0, "distillation_loss": 0.028910305351018906, "epoch": 2.31, "learning_rate": 3.7757479746935e-05, "loss": 0.0324, "step": 2432, "task_loss": 0.06403736025094986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.764011782250962, "compression_loss": 0.0, "distillation_loss": 0.03515395522117615, "epoch": 2.31, "learning_rate": 3.77483151097534e-05, "loss": 0.0325, "step": 2433, "task_loss": 0.008116895332932472 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7640297973771497, "compression_loss": 0.0, "distillation_loss": 0.07505205273628235, "epoch": 2.31, "learning_rate": 3.773914815677897e-05, "loss": 0.0719, "step": 2434, "task_loss": 0.04383193701505661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7640478064902779, "compression_loss": 0.0, "distillation_loss": 0.204584002494812, "epoch": 2.31, "learning_rate": 3.7729978889676915e-05, "loss": 0.1907, "step": 2435, "task_loss": 0.06581425666809082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7640658095913504, "compression_loss": 0.0, "distillation_loss": 0.1545599400997162, "epoch": 2.31, "learning_rate": 3.7720807310112896e-05, "loss": 0.1583, "step": 2436, "task_loss": 0.19169500470161438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7640838066813709, "compression_loss": 0.0, "distillation_loss": 0.10692571848630905, "epoch": 2.31, "learning_rate": 3.7711633419752954e-05, "loss": 0.1141, "step": 2437, "task_loss": 0.17911353707313538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641017977613429, "compression_loss": 0.0, "distillation_loss": 0.2065647542476654, "epoch": 2.32, "learning_rate": 3.7702457220263595e-05, "loss": 0.206, "step": 2438, "task_loss": 0.20088602602481842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641197828322703, "compression_loss": 0.0, "distillation_loss": 0.1430378258228302, "epoch": 2.32, "learning_rate": 3.76932787133117e-05, "loss": 0.1612, "step": 2439, "task_loss": 0.3245554566383362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641377618951566, "compression_loss": 0.0, "distillation_loss": 0.12717294692993164, "epoch": 2.32, "learning_rate": 3.768409790056459e-05, "loss": 0.1219, "step": 2440, "task_loss": 0.07445275783538818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641557349510057, "compression_loss": 0.0, "distillation_loss": 0.06758947670459747, "epoch": 2.32, "learning_rate": 3.7674914783690006e-05, "loss": 0.0633, "step": 2441, "task_loss": 0.024319326505064964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641737020008211, "compression_loss": 0.0, "distillation_loss": 0.0334925502538681, "epoch": 2.32, "learning_rate": 3.7665729364356115e-05, "loss": 0.0313, "step": 2442, "task_loss": 0.011463357135653496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7641916630456065, "compression_loss": 0.0, "distillation_loss": 0.112037792801857, "epoch": 2.32, "learning_rate": 3.7656541644231494e-05, "loss": 0.1136, "step": 2443, "task_loss": 0.12805123627185822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7642096180863657, "compression_loss": 0.0, "distillation_loss": 0.08280123025178909, "epoch": 2.32, "learning_rate": 3.764735162498512e-05, "loss": 0.0867, "step": 2444, "task_loss": 0.1218644231557846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7642275671241022, "compression_loss": 0.0, "distillation_loss": 0.16756752133369446, "epoch": 2.32, "learning_rate": 3.763815930828641e-05, "loss": 0.1726, "step": 2445, "task_loss": 0.21819201111793518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7642455101598198, "compression_loss": 0.0, "distillation_loss": 0.18085908889770508, "epoch": 2.32, "learning_rate": 3.76289646958052e-05, "loss": 0.1723, "step": 2446, "task_loss": 0.09551871567964554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7642634471945222, "compression_loss": 0.0, "distillation_loss": 0.11347385495901108, "epoch": 2.32, "learning_rate": 3.761976778921173e-05, "loss": 0.1107, "step": 2447, "task_loss": 0.08610384166240692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.764281378229213, "compression_loss": 0.0, "distillation_loss": 0.029696688055992126, "epoch": 2.32, "learning_rate": 3.761056859017667e-05, "loss": 0.0276, "step": 2448, "task_loss": 0.009167637676000595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7642993032648961, "compression_loss": 0.0, "distillation_loss": 0.1404319703578949, "epoch": 2.33, "learning_rate": 3.7601367100371085e-05, "loss": 0.1482, "step": 2449, "task_loss": 0.21834945678710938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7643172223025749, "compression_loss": 0.0, "distillation_loss": 0.15198810398578644, "epoch": 2.33, "learning_rate": 3.759216332146649e-05, "loss": 0.1448, "step": 2450, "task_loss": 0.07967476546764374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7643351353432531, "compression_loss": 0.0, "distillation_loss": 0.0767270177602768, "epoch": 2.33, "learning_rate": 3.7582957255134765e-05, "loss": 0.0727, "step": 2451, "task_loss": 0.03611510992050171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7643530423879347, "compression_loss": 0.0, "distillation_loss": 0.06880663335323334, "epoch": 2.33, "learning_rate": 3.7573748903048266e-05, "loss": 0.0728, "step": 2452, "task_loss": 0.10889902710914612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7643709434376229, "compression_loss": 0.0, "distillation_loss": 0.11665643751621246, "epoch": 2.33, "learning_rate": 3.756453826687972e-05, "loss": 0.1273, "step": 2453, "task_loss": 0.2232402116060257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7643888384933218, "compression_loss": 0.0, "distillation_loss": 0.11299058794975281, "epoch": 2.33, "learning_rate": 3.755532534830229e-05, "loss": 0.1148, "step": 2454, "task_loss": 0.13139456510543823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644067275560349, "compression_loss": 0.0, "distillation_loss": 0.14656969904899597, "epoch": 2.33, "learning_rate": 3.7546110148989535e-05, "loss": 0.1399, "step": 2455, "task_loss": 0.07960043847560883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644246106267659, "compression_loss": 0.0, "distillation_loss": 0.09302163124084473, "epoch": 2.33, "learning_rate": 3.7536892670615454e-05, "loss": 0.0898, "step": 2456, "task_loss": 0.06115756928920746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644424877065185, "compression_loss": 0.0, "distillation_loss": 0.27814599871635437, "epoch": 2.33, "learning_rate": 3.752767291485444e-05, "loss": 0.29, "step": 2457, "task_loss": 0.3964260220527649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644603587962964, "compression_loss": 0.0, "distillation_loss": 0.13266442716121674, "epoch": 2.33, "learning_rate": 3.7518450883381306e-05, "loss": 0.1348, "step": 2458, "task_loss": 0.15446260571479797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644782238971031, "compression_loss": 0.0, "distillation_loss": 0.24767599999904633, "epoch": 2.34, "learning_rate": 3.750922657787128e-05, "loss": 0.2381, "step": 2459, "task_loss": 0.15177518129348755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7644960830099425, "compression_loss": 0.0, "distillation_loss": 0.044840142130851746, "epoch": 2.34, "learning_rate": 3.7500000000000003e-05, "loss": 0.051, "step": 2460, "task_loss": 0.10653319954872131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7645139361358183, "compression_loss": 0.0, "distillation_loss": 0.039941057562828064, "epoch": 2.34, "learning_rate": 3.7490771151443525e-05, "loss": 0.0537, "step": 2461, "task_loss": 0.17739588022232056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.764531783275734, "compression_loss": 0.0, "distillation_loss": 0.03993482142686844, "epoch": 2.34, "learning_rate": 3.748154003387831e-05, "loss": 0.0367, "step": 2462, "task_loss": 0.00717165507376194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7645496244306934, "compression_loss": 0.0, "distillation_loss": 0.12393603473901749, "epoch": 2.34, "learning_rate": 3.7472306648981235e-05, "loss": 0.1282, "step": 2463, "task_loss": 0.16707316040992737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7645674596017003, "compression_loss": 0.0, "distillation_loss": 0.07649366557598114, "epoch": 2.34, "learning_rate": 3.746307099842959e-05, "loss": 0.0711, "step": 2464, "task_loss": 0.022800806909799576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7645852887897581, "compression_loss": 0.0, "distillation_loss": 0.20021630823612213, "epoch": 2.34, "learning_rate": 3.745383308390108e-05, "loss": 0.2007, "step": 2465, "task_loss": 0.20535027980804443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646031119958706, "compression_loss": 0.0, "distillation_loss": 0.2035316526889801, "epoch": 2.34, "learning_rate": 3.74445929070738e-05, "loss": 0.196, "step": 2466, "task_loss": 0.1283217966556549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646209292210415, "compression_loss": 0.0, "distillation_loss": 0.06502929329872131, "epoch": 2.34, "learning_rate": 3.74353504696263e-05, "loss": 0.0669, "step": 2467, "task_loss": 0.08407002687454224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646387404662746, "compression_loss": 0.0, "distillation_loss": 0.2670043408870697, "epoch": 2.34, "learning_rate": 3.742610577323749e-05, "loss": 0.275, "step": 2468, "task_loss": 0.34709686040878296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646565457325735, "compression_loss": 0.0, "distillation_loss": 0.06426650285720825, "epoch": 2.34, "learning_rate": 3.7416858819586724e-05, "loss": 0.0684, "step": 2469, "task_loss": 0.10525249689817429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646743450209417, "compression_loss": 0.0, "distillation_loss": 0.15390363335609436, "epoch": 2.35, "learning_rate": 3.740760961035375e-05, "loss": 0.1644, "step": 2470, "task_loss": 0.258728951215744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7646921383323831, "compression_loss": 0.0, "distillation_loss": 0.040257424116134644, "epoch": 2.35, "learning_rate": 3.739835814721874e-05, "loss": 0.037, "step": 2471, "task_loss": 0.0077125392854213715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647099256679013, "compression_loss": 0.0, "distillation_loss": 0.12947189807891846, "epoch": 2.35, "learning_rate": 3.738910443186226e-05, "loss": 0.1284, "step": 2472, "task_loss": 0.1188727617263794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647277070285, "compression_loss": 0.0, "distillation_loss": 0.16135096549987793, "epoch": 2.35, "learning_rate": 3.737984846596528e-05, "loss": 0.1543, "step": 2473, "task_loss": 0.09042131900787354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647454824151829, "compression_loss": 0.0, "distillation_loss": 0.02904283069074154, "epoch": 2.35, "learning_rate": 3.737059025120922e-05, "loss": 0.0267, "step": 2474, "task_loss": 0.005899908021092415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647632518289538, "compression_loss": 0.0, "distillation_loss": 0.11024047434329987, "epoch": 2.35, "learning_rate": 3.7361329789275855e-05, "loss": 0.1193, "step": 2475, "task_loss": 0.20116351544857025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647810152708161, "compression_loss": 0.0, "distillation_loss": 0.04543491452932358, "epoch": 2.35, "learning_rate": 3.7352067081847405e-05, "loss": 0.042, "step": 2476, "task_loss": 0.010787051171064377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7647987727417737, "compression_loss": 0.0, "distillation_loss": 0.09550483524799347, "epoch": 2.35, "learning_rate": 3.734280213060649e-05, "loss": 0.1008, "step": 2477, "task_loss": 0.1483430564403534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7648165242428301, "compression_loss": 0.0, "distillation_loss": 0.08557014912366867, "epoch": 2.35, "learning_rate": 3.7333534937236105e-05, "loss": 0.0859, "step": 2478, "task_loss": 0.08859314769506454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7648342697749891, "compression_loss": 0.0, "distillation_loss": 0.12534424662590027, "epoch": 2.35, "learning_rate": 3.7324265503419716e-05, "loss": 0.123, "step": 2479, "task_loss": 0.10213040560483932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7648520093392545, "compression_loss": 0.0, "distillation_loss": 0.09746745228767395, "epoch": 2.36, "learning_rate": 3.731499383084114e-05, "loss": 0.1161, "step": 2480, "task_loss": 0.2837420403957367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7648697429366297, "compression_loss": 0.0, "distillation_loss": 0.09911562502384186, "epoch": 2.36, "learning_rate": 3.730571992118462e-05, "loss": 0.1044, "step": 2481, "task_loss": 0.1517096608877182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7648874705681187, "compression_loss": 0.0, "distillation_loss": 0.04106239229440689, "epoch": 2.36, "learning_rate": 3.7296443776134814e-05, "loss": 0.046, "step": 2482, "task_loss": 0.0909072607755661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649051922347249, "compression_loss": 0.0, "distillation_loss": 0.16157306730747223, "epoch": 2.36, "learning_rate": 3.7287165397376775e-05, "loss": 0.17, "step": 2483, "task_loss": 0.2461170256137848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649229079374521, "compression_loss": 0.0, "distillation_loss": 0.034094762057065964, "epoch": 2.36, "learning_rate": 3.727788478659597e-05, "loss": 0.0376, "step": 2484, "task_loss": 0.06928237527608871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649406176773041, "compression_loss": 0.0, "distillation_loss": 0.06985623389482498, "epoch": 2.36, "learning_rate": 3.726860194547826e-05, "loss": 0.0685, "step": 2485, "task_loss": 0.05666497349739075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649583214552844, "compression_loss": 0.0, "distillation_loss": 0.07079368084669113, "epoch": 2.36, "learning_rate": 3.725931687570992e-05, "loss": 0.0717, "step": 2486, "task_loss": 0.07995907217264175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649760192723968, "compression_loss": 0.0, "distillation_loss": 0.056753143668174744, "epoch": 2.36, "learning_rate": 3.7250029578977625e-05, "loss": 0.056, "step": 2487, "task_loss": 0.04943781718611717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7649937111296448, "compression_loss": 0.0, "distillation_loss": 0.08018404990434647, "epoch": 2.36, "learning_rate": 3.724074005696847e-05, "loss": 0.0841, "step": 2488, "task_loss": 0.11959824711084366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7650113970280323, "compression_loss": 0.0, "distillation_loss": 0.17123107612133026, "epoch": 2.36, "learning_rate": 3.723144831136992e-05, "loss": 0.1641, "step": 2489, "task_loss": 0.10034439712762833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7650290769685629, "compression_loss": 0.0, "distillation_loss": 0.06683574616909027, "epoch": 2.36, "learning_rate": 3.722215434386988e-05, "loss": 0.0654, "step": 2490, "task_loss": 0.05212263762950897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7650467509522403, "compression_loss": 0.0, "distillation_loss": 0.015314219519495964, "epoch": 2.37, "learning_rate": 3.721285815615665e-05, "loss": 0.0142, "step": 2491, "task_loss": 0.003950970247387886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.765064418980068, "compression_loss": 0.0, "distillation_loss": 0.04659897834062576, "epoch": 2.37, "learning_rate": 3.7203559749918904e-05, "loss": 0.0624, "step": 2492, "task_loss": 0.20480214059352875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7650820810530501, "compression_loss": 0.0, "distillation_loss": 0.12938767671585083, "epoch": 2.37, "learning_rate": 3.7194259126845764e-05, "loss": 0.1231, "step": 2493, "task_loss": 0.0663764700293541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7650997371721899, "compression_loss": 0.0, "distillation_loss": 0.07641440629959106, "epoch": 2.37, "learning_rate": 3.7184956288626724e-05, "loss": 0.0879, "step": 2494, "task_loss": 0.1917191445827484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7651173873384912, "compression_loss": 0.0, "distillation_loss": 0.08198924362659454, "epoch": 2.37, "learning_rate": 3.71756512369517e-05, "loss": 0.0815, "step": 2495, "task_loss": 0.077280193567276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7651350315529577, "compression_loss": 0.0, "distillation_loss": 0.15662863850593567, "epoch": 2.37, "learning_rate": 3.716634397351097e-05, "loss": 0.1554, "step": 2496, "task_loss": 0.14391350746154785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.765152669816593, "compression_loss": 0.0, "distillation_loss": 0.0537070631980896, "epoch": 2.37, "learning_rate": 3.715703449999528e-05, "loss": 0.0495, "step": 2497, "task_loss": 0.011994145810604095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7651703021304009, "compression_loss": 0.0, "distillation_loss": 0.20824488997459412, "epoch": 2.37, "learning_rate": 3.7147722818095724e-05, "loss": 0.2049, "step": 2498, "task_loss": 0.17516371607780457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7651879284953851, "compression_loss": 0.0, "distillation_loss": 0.0815531462430954, "epoch": 2.37, "learning_rate": 3.713840892950381e-05, "loss": 0.089, "step": 2499, "task_loss": 0.15595076978206635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652055489125491, "compression_loss": 0.0, "distillation_loss": 0.04428771510720253, "epoch": 2.37, "learning_rate": 3.712909283591145e-05, "loss": 0.0555, "step": 2500, "task_loss": 0.15677018463611603 }, { "epoch": 2.37, "eval_accuracy": 0.8864678899082569, "eval_loss": 0.4365707039833069, "eval_runtime": 18.2037, "eval_samples_per_second": 47.902, "eval_steps_per_second": 5.988, "step": 2500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652231633828968, "compression_loss": 0.0, "distillation_loss": 0.03173205256462097, "epoch": 2.38, "learning_rate": 3.7119774539010967e-05, "loss": 0.0396, "step": 2501, "task_loss": 0.10992399603128433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652407719074318, "compression_loss": 0.0, "distillation_loss": 0.03812164068222046, "epoch": 2.38, "learning_rate": 3.711045404049507e-05, "loss": 0.0511, "step": 2502, "task_loss": 0.16801124811172485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652583744871576, "compression_loss": 0.0, "distillation_loss": 0.054551735520362854, "epoch": 2.38, "learning_rate": 3.710113134205687e-05, "loss": 0.0503, "step": 2503, "task_loss": 0.011674726381897926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652759711230782, "compression_loss": 0.0, "distillation_loss": 0.057191766798496246, "epoch": 2.38, "learning_rate": 3.709180644538988e-05, "loss": 0.0556, "step": 2504, "task_loss": 0.04153808206319809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7652935618161971, "compression_loss": 0.0, "distillation_loss": 0.07190201431512833, "epoch": 2.38, "learning_rate": 3.708247935218802e-05, "loss": 0.0705, "step": 2505, "task_loss": 0.05747076869010925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.765311146567518, "compression_loss": 0.0, "distillation_loss": 0.0554814338684082, "epoch": 2.38, "learning_rate": 3.707315006414559e-05, "loss": 0.0563, "step": 2506, "task_loss": 0.06399840116500854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7653287253780445, "compression_loss": 0.0, "distillation_loss": 0.14144667983055115, "epoch": 2.38, "learning_rate": 3.706381858295731e-05, "loss": 0.1652, "step": 2507, "task_loss": 0.37935495376586914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7653462982487805, "compression_loss": 0.0, "distillation_loss": 0.19897310435771942, "epoch": 2.38, "learning_rate": 3.705448491031829e-05, "loss": 0.2037, "step": 2508, "task_loss": 0.24580544233322144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7653638651807295, "compression_loss": 0.0, "distillation_loss": 0.03850369527935982, "epoch": 2.38, "learning_rate": 3.7045149047924016e-05, "loss": 0.0414, "step": 2509, "task_loss": 0.06746485829353333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7653814261748952, "compression_loss": 0.0, "distillation_loss": 0.01753471978008747, "epoch": 2.38, "learning_rate": 3.703581099747041e-05, "loss": 0.0161, "step": 2510, "task_loss": 0.002795502543449402 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7653989812322815, "compression_loss": 0.0, "distillation_loss": 0.1509072184562683, "epoch": 2.38, "learning_rate": 3.702647076065378e-05, "loss": 0.1567, "step": 2511, "task_loss": 0.2087806612253189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7654165303538917, "compression_loss": 0.0, "distillation_loss": 0.19133007526397705, "epoch": 2.39, "learning_rate": 3.701712833917082e-05, "loss": 0.1858, "step": 2512, "task_loss": 0.13650889694690704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7654340735407298, "compression_loss": 0.0, "distillation_loss": 0.11685886234045029, "epoch": 2.39, "learning_rate": 3.700778373471861e-05, "loss": 0.1192, "step": 2513, "task_loss": 0.1399974673986435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7654516107937992, "compression_loss": 0.0, "distillation_loss": 0.04263558238744736, "epoch": 2.39, "learning_rate": 3.699843694899467e-05, "loss": 0.0548, "step": 2514, "task_loss": 0.16461949050426483 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7654691421141039, "compression_loss": 0.0, "distillation_loss": 0.13114984333515167, "epoch": 2.39, "learning_rate": 3.698908798369686e-05, "loss": 0.1309, "step": 2515, "task_loss": 0.12871742248535156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7654866675026474, "compression_loss": 0.0, "distillation_loss": 0.08641094714403152, "epoch": 2.39, "learning_rate": 3.697973684052347e-05, "loss": 0.0799, "step": 2516, "task_loss": 0.021780574694275856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7655041869604334, "compression_loss": 0.0, "distillation_loss": 0.05185171589255333, "epoch": 2.39, "learning_rate": 3.697038352117321e-05, "loss": 0.055, "step": 2517, "task_loss": 0.08340021222829819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7655217004884656, "compression_loss": 0.0, "distillation_loss": 0.0397905595600605, "epoch": 2.39, "learning_rate": 3.6961028027345114e-05, "loss": 0.057, "step": 2518, "task_loss": 0.21199220418930054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7655392080877477, "compression_loss": 0.0, "distillation_loss": 0.08754666894674301, "epoch": 2.39, "learning_rate": 3.695167036073868e-05, "loss": 0.0948, "step": 2519, "task_loss": 0.1603783816099167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7655567097592834, "compression_loss": 0.0, "distillation_loss": 0.049034878611564636, "epoch": 2.39, "learning_rate": 3.694231052305376e-05, "loss": 0.0531, "step": 2520, "task_loss": 0.09004518389701843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7655742055040762, "compression_loss": 0.0, "distillation_loss": 0.17068904638290405, "epoch": 2.39, "learning_rate": 3.693294851599063e-05, "loss": 0.162, "step": 2521, "task_loss": 0.08347401022911072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76559169532313, "compression_loss": 0.0, "distillation_loss": 0.2087569534778595, "epoch": 2.4, "learning_rate": 3.692358434124992e-05, "loss": 0.2005, "step": 2522, "task_loss": 0.12605254352092743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7656091792174484, "compression_loss": 0.0, "distillation_loss": 0.18309544026851654, "epoch": 2.4, "learning_rate": 3.69142180005327e-05, "loss": 0.1849, "step": 2523, "task_loss": 0.20159180462360382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7656266571880351, "compression_loss": 0.0, "distillation_loss": 0.09189262241125107, "epoch": 2.4, "learning_rate": 3.69048494955404e-05, "loss": 0.0892, "step": 2524, "task_loss": 0.06457659602165222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7656441292358938, "compression_loss": 0.0, "distillation_loss": 0.06329502165317535, "epoch": 2.4, "learning_rate": 3.689547882797485e-05, "loss": 0.0645, "step": 2525, "task_loss": 0.07518291473388672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.765661595362028, "compression_loss": 0.0, "distillation_loss": 0.04618072509765625, "epoch": 2.4, "learning_rate": 3.688610599953828e-05, "loss": 0.0424, "step": 2526, "task_loss": 0.008488666266202927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7656790555674418, "compression_loss": 0.0, "distillation_loss": 0.1281333714723587, "epoch": 2.4, "learning_rate": 3.6876731011933316e-05, "loss": 0.1215, "step": 2527, "task_loss": 0.06217958778142929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7656965098531384, "compression_loss": 0.0, "distillation_loss": 0.10788536071777344, "epoch": 2.4, "learning_rate": 3.686735386686296e-05, "loss": 0.1122, "step": 2528, "task_loss": 0.15063226222991943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7657139582201218, "compression_loss": 0.0, "distillation_loss": 0.11169138550758362, "epoch": 2.4, "learning_rate": 3.685797456603062e-05, "loss": 0.1213, "step": 2529, "task_loss": 0.20762555301189423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7657314006693955, "compression_loss": 0.0, "distillation_loss": 0.2469841092824936, "epoch": 2.4, "learning_rate": 3.684859311114009e-05, "loss": 0.2493, "step": 2530, "task_loss": 0.2696739435195923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7657488372019633, "compression_loss": 0.0, "distillation_loss": 0.08042797446250916, "epoch": 2.4, "learning_rate": 3.6839209503895566e-05, "loss": 0.0757, "step": 2531, "task_loss": 0.033532898873090744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7657662678188288, "compression_loss": 0.0, "distillation_loss": 0.05556613951921463, "epoch": 2.4, "learning_rate": 3.6829823746001616e-05, "loss": 0.0513, "step": 2532, "task_loss": 0.013295382261276245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7657836925209958, "compression_loss": 0.0, "distillation_loss": 0.13678494095802307, "epoch": 2.41, "learning_rate": 3.6820435839163205e-05, "loss": 0.1351, "step": 2533, "task_loss": 0.12005842477083206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658011113094678, "compression_loss": 0.0, "distillation_loss": 0.07145422697067261, "epoch": 2.41, "learning_rate": 3.68110457850857e-05, "loss": 0.0678, "step": 2534, "task_loss": 0.03514321148395538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658185241852488, "compression_loss": 0.0, "distillation_loss": 0.1274898201227188, "epoch": 2.41, "learning_rate": 3.680165358547484e-05, "loss": 0.1235, "step": 2535, "task_loss": 0.08788056671619415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658359311493421, "compression_loss": 0.0, "distillation_loss": 0.17474448680877686, "epoch": 2.41, "learning_rate": 3.6792259242036776e-05, "loss": 0.1785, "step": 2536, "task_loss": 0.21268706023693085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658533322027515, "compression_loss": 0.0, "distillation_loss": 0.11320266127586365, "epoch": 2.41, "learning_rate": 3.678286275647802e-05, "loss": 0.1103, "step": 2537, "task_loss": 0.08437247574329376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658707273464809, "compression_loss": 0.0, "distillation_loss": 0.15101607143878937, "epoch": 2.41, "learning_rate": 3.677346413050551e-05, "loss": 0.15, "step": 2538, "task_loss": 0.14134559035301208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7658881165815338, "compression_loss": 0.0, "distillation_loss": 0.10815154016017914, "epoch": 2.41, "learning_rate": 3.6764063365826525e-05, "loss": 0.1181, "step": 2539, "task_loss": 0.2076764553785324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7659054999089138, "compression_loss": 0.0, "distillation_loss": 0.10024487972259521, "epoch": 2.41, "learning_rate": 3.675466046414878e-05, "loss": 0.1026, "step": 2540, "task_loss": 0.12348996847867966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7659228773296247, "compression_loss": 0.0, "distillation_loss": 0.033068105578422546, "epoch": 2.41, "learning_rate": 3.674525542718035e-05, "loss": 0.0304, "step": 2541, "task_loss": 0.006272824481129646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7659402488446703, "compression_loss": 0.0, "distillation_loss": 0.05263807624578476, "epoch": 2.41, "learning_rate": 3.6735848256629705e-05, "loss": 0.0482, "step": 2542, "task_loss": 0.008157419040799141 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.765957614455054, "compression_loss": 0.0, "distillation_loss": 0.06785233318805695, "epoch": 2.42, "learning_rate": 3.6726438954205714e-05, "loss": 0.0744, "step": 2543, "task_loss": 0.13311488926410675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7659749741617796, "compression_loss": 0.0, "distillation_loss": 0.034298889338970184, "epoch": 2.42, "learning_rate": 3.6717027521617595e-05, "loss": 0.0315, "step": 2544, "task_loss": 0.006205489858984947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7659923279658509, "compression_loss": 0.0, "distillation_loss": 0.19808626174926758, "epoch": 2.42, "learning_rate": 3.6707613960575006e-05, "loss": 0.1921, "step": 2545, "task_loss": 0.13829779624938965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7660096758682715, "compression_loss": 0.0, "distillation_loss": 0.11868824064731598, "epoch": 2.42, "learning_rate": 3.669819827278795e-05, "loss": 0.118, "step": 2546, "task_loss": 0.11179932951927185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766027017870045, "compression_loss": 0.0, "distillation_loss": 0.018186789005994797, "epoch": 2.42, "learning_rate": 3.668878045996685e-05, "loss": 0.0167, "step": 2547, "task_loss": 0.0034119393676519394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7660443539721753, "compression_loss": 0.0, "distillation_loss": 0.05021512135863304, "epoch": 2.42, "learning_rate": 3.667936052382248e-05, "loss": 0.0501, "step": 2548, "task_loss": 0.04938085749745369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7660616841756658, "compression_loss": 0.0, "distillation_loss": 0.1335231363773346, "epoch": 2.42, "learning_rate": 3.666993846606602e-05, "loss": 0.1321, "step": 2549, "task_loss": 0.11888933926820755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7660790084815203, "compression_loss": 0.0, "distillation_loss": 0.07150552421808243, "epoch": 2.42, "learning_rate": 3.666051428840904e-05, "loss": 0.0767, "step": 2550, "task_loss": 0.12331639230251312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7660963268907426, "compression_loss": 0.0, "distillation_loss": 0.3968278169631958, "epoch": 2.42, "learning_rate": 3.665108799256348e-05, "loss": 0.4067, "step": 2551, "task_loss": 0.4955511689186096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7661136394043363, "compression_loss": 0.0, "distillation_loss": 0.2079869657754898, "epoch": 2.42, "learning_rate": 3.6641659580241665e-05, "loss": 0.2161, "step": 2552, "task_loss": 0.2889349162578583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766130946023305, "compression_loss": 0.0, "distillation_loss": 0.21949072182178497, "epoch": 2.42, "learning_rate": 3.663222905315633e-05, "loss": 0.2231, "step": 2553, "task_loss": 0.25531524419784546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7661482467486525, "compression_loss": 0.0, "distillation_loss": 0.1400509923696518, "epoch": 2.43, "learning_rate": 3.662279641302056e-05, "loss": 0.1469, "step": 2554, "task_loss": 0.2087712436914444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7661655415813824, "compression_loss": 0.0, "distillation_loss": 0.029690932482481003, "epoch": 2.43, "learning_rate": 3.6613361661547854e-05, "loss": 0.0333, "step": 2555, "task_loss": 0.06537258625030518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7661828305224985, "compression_loss": 0.0, "distillation_loss": 0.08491934835910797, "epoch": 2.43, "learning_rate": 3.660392480045206e-05, "loss": 0.0799, "step": 2556, "task_loss": 0.03468838334083557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7662001135730042, "compression_loss": 0.0, "distillation_loss": 0.06277995556592941, "epoch": 2.43, "learning_rate": 3.659448583144745e-05, "loss": 0.06, "step": 2557, "task_loss": 0.03468390554189682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7662173907339035, "compression_loss": 0.0, "distillation_loss": 0.14954762160778046, "epoch": 2.43, "learning_rate": 3.658504475624865e-05, "loss": 0.1508, "step": 2558, "task_loss": 0.16203594207763672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7662346620061999, "compression_loss": 0.0, "distillation_loss": 0.08595126867294312, "epoch": 2.43, "learning_rate": 3.657560157657067e-05, "loss": 0.0885, "step": 2559, "task_loss": 0.11119415611028671 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7662519273908973, "compression_loss": 0.0, "distillation_loss": 0.25560837984085083, "epoch": 2.43, "learning_rate": 3.656615629412892e-05, "loss": 0.2518, "step": 2560, "task_loss": 0.21740317344665527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766269186888999, "compression_loss": 0.0, "distillation_loss": 0.03523785620927811, "epoch": 2.43, "learning_rate": 3.655670891063917e-05, "loss": 0.0325, "step": 2561, "task_loss": 0.007899057120084763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7662864405015091, "compression_loss": 0.0, "distillation_loss": 0.07272191345691681, "epoch": 2.43, "learning_rate": 3.6547259427817595e-05, "loss": 0.068, "step": 2562, "task_loss": 0.025067364796996117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663036882294311, "compression_loss": 0.0, "distillation_loss": 0.06609214842319489, "epoch": 2.43, "learning_rate": 3.6537807847380726e-05, "loss": 0.0712, "step": 2563, "task_loss": 0.11691683530807495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663209300737686, "compression_loss": 0.0, "distillation_loss": 0.06222629174590111, "epoch": 2.43, "learning_rate": 3.65283541710455e-05, "loss": 0.0747, "step": 2564, "task_loss": 0.18653015792369843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663381660355253, "compression_loss": 0.0, "distillation_loss": 0.08360789716243744, "epoch": 2.44, "learning_rate": 3.6518898400529214e-05, "loss": 0.0781, "step": 2565, "task_loss": 0.02866414003074169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663553961157049, "compression_loss": 0.0, "distillation_loss": 0.044771708548069, "epoch": 2.44, "learning_rate": 3.650944053754956e-05, "loss": 0.0451, "step": 2566, "task_loss": 0.04824347794055939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663726203153112, "compression_loss": 0.0, "distillation_loss": 0.09424047917127609, "epoch": 2.44, "learning_rate": 3.6499980583824606e-05, "loss": 0.0961, "step": 2567, "task_loss": 0.1124841496348381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7663898386353478, "compression_loss": 0.0, "distillation_loss": 0.04939156025648117, "epoch": 2.44, "learning_rate": 3.64905185410728e-05, "loss": 0.0606, "step": 2568, "task_loss": 0.16101084649562836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664070510768184, "compression_loss": 0.0, "distillation_loss": 0.08628442138433456, "epoch": 2.44, "learning_rate": 3.6481054411012946e-05, "loss": 0.087, "step": 2569, "task_loss": 0.09357213228940964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664242576407266, "compression_loss": 0.0, "distillation_loss": 0.06562324613332748, "epoch": 2.44, "learning_rate": 3.647158819536427e-05, "loss": 0.0648, "step": 2570, "task_loss": 0.057407625019550323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664414583280762, "compression_loss": 0.0, "distillation_loss": 0.18060490489006042, "epoch": 2.44, "learning_rate": 3.646211989584635e-05, "loss": 0.1716, "step": 2571, "task_loss": 0.09014902263879776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664586531398708, "compression_loss": 0.0, "distillation_loss": 0.072050541639328, "epoch": 2.44, "learning_rate": 3.645264951417915e-05, "loss": 0.0875, "step": 2572, "task_loss": 0.2262389212846756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664758420771142, "compression_loss": 0.0, "distillation_loss": 0.1359626054763794, "epoch": 2.44, "learning_rate": 3.644317705208301e-05, "loss": 0.1305, "step": 2573, "task_loss": 0.08181658387184143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7664930251408099, "compression_loss": 0.0, "distillation_loss": 0.049975574016571045, "epoch": 2.44, "learning_rate": 3.643370251127865e-05, "loss": 0.0476, "step": 2574, "task_loss": 0.025839539244771004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665102023319617, "compression_loss": 0.0, "distillation_loss": 0.13199934363365173, "epoch": 2.45, "learning_rate": 3.6424225893487166e-05, "loss": 0.1329, "step": 2575, "task_loss": 0.1408880352973938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665273736515732, "compression_loss": 0.0, "distillation_loss": 0.036083199083805084, "epoch": 2.45, "learning_rate": 3.641474720043002e-05, "loss": 0.0437, "step": 2576, "task_loss": 0.11259350180625916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665445391006481, "compression_loss": 0.0, "distillation_loss": 0.06714160740375519, "epoch": 2.45, "learning_rate": 3.6405266433829075e-05, "loss": 0.0623, "step": 2577, "task_loss": 0.0186399407684803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665616986801902, "compression_loss": 0.0, "distillation_loss": 0.1377488076686859, "epoch": 2.45, "learning_rate": 3.639578359540655e-05, "loss": 0.1438, "step": 2578, "task_loss": 0.19833146035671234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665788523912032, "compression_loss": 0.0, "distillation_loss": 0.030588299036026, "epoch": 2.45, "learning_rate": 3.638629868688506e-05, "loss": 0.0324, "step": 2579, "task_loss": 0.04895820468664169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7665960002346905, "compression_loss": 0.0, "distillation_loss": 0.022023940458893776, "epoch": 2.45, "learning_rate": 3.6376811709987574e-05, "loss": 0.0295, "step": 2580, "task_loss": 0.09721846878528595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766613142211656, "compression_loss": 0.0, "distillation_loss": 0.021494712680578232, "epoch": 2.45, "learning_rate": 3.636732266643745e-05, "loss": 0.028, "step": 2581, "task_loss": 0.0862061157822609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7666302783231035, "compression_loss": 0.0, "distillation_loss": 0.019449274986982346, "epoch": 2.45, "learning_rate": 3.635783155795841e-05, "loss": 0.0182, "step": 2582, "task_loss": 0.006459413096308708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7666474085700363, "compression_loss": 0.0, "distillation_loss": 0.04088941961526871, "epoch": 2.45, "learning_rate": 3.634833838627458e-05, "loss": 0.0504, "step": 2583, "task_loss": 0.13594874739646912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7666645329534585, "compression_loss": 0.0, "distillation_loss": 0.23599717020988464, "epoch": 2.45, "learning_rate": 3.6338843153110424e-05, "loss": 0.2423, "step": 2584, "task_loss": 0.29853349924087524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7666816514743735, "compression_loss": 0.0, "distillation_loss": 0.1065201386809349, "epoch": 2.45, "learning_rate": 3.63293458601908e-05, "loss": 0.1064, "step": 2585, "task_loss": 0.10565009713172913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7666987641337851, "compression_loss": 0.0, "distillation_loss": 0.08189831674098969, "epoch": 2.46, "learning_rate": 3.631984650924094e-05, "loss": 0.0766, "step": 2586, "task_loss": 0.029174111783504486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766715870932697, "compression_loss": 0.0, "distillation_loss": 0.07314605265855789, "epoch": 2.46, "learning_rate": 3.631034510198643e-05, "loss": 0.067, "step": 2587, "task_loss": 0.011908493936061859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7667329718721128, "compression_loss": 0.0, "distillation_loss": 0.20526961982250214, "epoch": 2.46, "learning_rate": 3.630084164015328e-05, "loss": 0.2009, "step": 2588, "task_loss": 0.1615629643201828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7667500669530363, "compression_loss": 0.0, "distillation_loss": 0.02173478901386261, "epoch": 2.46, "learning_rate": 3.6291336125467814e-05, "loss": 0.0284, "step": 2589, "task_loss": 0.08851068466901779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766767156176471, "compression_loss": 0.0, "distillation_loss": 0.0929790809750557, "epoch": 2.46, "learning_rate": 3.628182855965676e-05, "loss": 0.0901, "step": 2590, "task_loss": 0.06388040632009506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7667842395434207, "compression_loss": 0.0, "distillation_loss": 0.09808763116598129, "epoch": 2.46, "learning_rate": 3.627231894444721e-05, "loss": 0.093, "step": 2591, "task_loss": 0.04766422510147095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7668013170548891, "compression_loss": 0.0, "distillation_loss": 0.08065304160118103, "epoch": 2.46, "learning_rate": 3.6262807281566634e-05, "loss": 0.0768, "step": 2592, "task_loss": 0.04211016744375229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7668183887118799, "compression_loss": 0.0, "distillation_loss": 0.09210735559463501, "epoch": 2.46, "learning_rate": 3.6253293572742884e-05, "loss": 0.0841, "step": 2593, "task_loss": 0.011922374367713928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7668354545153966, "compression_loss": 0.0, "distillation_loss": 0.20635256171226501, "epoch": 2.46, "learning_rate": 3.624377781970416e-05, "loss": 0.1924, "step": 2594, "task_loss": 0.06673353165388107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7668525144664431, "compression_loss": 0.0, "distillation_loss": 0.1058523878455162, "epoch": 2.46, "learning_rate": 3.6234260024179033e-05, "loss": 0.1146, "step": 2595, "task_loss": 0.1931142807006836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.766869568566023, "compression_loss": 0.0, "distillation_loss": 0.029114918783307076, "epoch": 2.47, "learning_rate": 3.622474018789648e-05, "loss": 0.0281, "step": 2596, "task_loss": 0.018587203696370125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76688661681514, "compression_loss": 0.0, "distillation_loss": 0.025142014026641846, "epoch": 2.47, "learning_rate": 3.62152183125858e-05, "loss": 0.0326, "step": 2597, "task_loss": 0.09944605827331543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7669036592147978, "compression_loss": 0.0, "distillation_loss": 0.19486957788467407, "epoch": 2.47, "learning_rate": 3.620569439997671e-05, "loss": 0.2059, "step": 2598, "task_loss": 0.30563467741012573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7669206957659999, "compression_loss": 0.0, "distillation_loss": 0.03863801434636116, "epoch": 2.47, "learning_rate": 3.6196168451799266e-05, "loss": 0.0356, "step": 2599, "task_loss": 0.008016956970095634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7669377264697502, "compression_loss": 0.0, "distillation_loss": 0.13873320817947388, "epoch": 2.47, "learning_rate": 3.618664046978389e-05, "loss": 0.1375, "step": 2600, "task_loss": 0.12616637349128723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7669547513270523, "compression_loss": 0.0, "distillation_loss": 0.022825509309768677, "epoch": 2.47, "learning_rate": 3.617711045566141e-05, "loss": 0.0215, "step": 2601, "task_loss": 0.00933530181646347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76697177033891, "compression_loss": 0.0, "distillation_loss": 0.06487306207418442, "epoch": 2.47, "learning_rate": 3.616757841116298e-05, "loss": 0.0745, "step": 2602, "task_loss": 0.16160070896148682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7669887835063267, "compression_loss": 0.0, "distillation_loss": 0.0611257366836071, "epoch": 2.47, "learning_rate": 3.6158044338020155e-05, "loss": 0.0651, "step": 2603, "task_loss": 0.1010468602180481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7670057908303063, "compression_loss": 0.0, "distillation_loss": 0.028607673943042755, "epoch": 2.47, "learning_rate": 3.614850823796483e-05, "loss": 0.0267, "step": 2604, "task_loss": 0.009802697226405144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7670227923118524, "compression_loss": 0.0, "distillation_loss": 0.19368760287761688, "epoch": 2.47, "learning_rate": 3.6138970112729296e-05, "loss": 0.1928, "step": 2605, "task_loss": 0.184406578540802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7670397879519689, "compression_loss": 0.0, "distillation_loss": 0.06413474678993225, "epoch": 2.47, "learning_rate": 3.612942996404619e-05, "loss": 0.0689, "step": 2606, "task_loss": 0.11217701435089111 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7670567777516591, "compression_loss": 0.0, "distillation_loss": 0.07466208934783936, "epoch": 2.48, "learning_rate": 3.611988779364853e-05, "loss": 0.0731, "step": 2607, "task_loss": 0.059125810861587524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767073761711927, "compression_loss": 0.0, "distillation_loss": 0.034310828894376755, "epoch": 2.48, "learning_rate": 3.611034360326971e-05, "loss": 0.0405, "step": 2608, "task_loss": 0.09609294682741165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7670907398337762, "compression_loss": 0.0, "distillation_loss": 0.07237622141838074, "epoch": 2.48, "learning_rate": 3.6100797394643455e-05, "loss": 0.0682, "step": 2609, "task_loss": 0.03029775619506836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7671077121182103, "compression_loss": 0.0, "distillation_loss": 0.03261907026171684, "epoch": 2.48, "learning_rate": 3.60912491695039e-05, "loss": 0.0302, "step": 2610, "task_loss": 0.008593011647462845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7671246785662329, "compression_loss": 0.0, "distillation_loss": 0.044936779886484146, "epoch": 2.48, "learning_rate": 3.608169892958551e-05, "loss": 0.0455, "step": 2611, "task_loss": 0.050538793206214905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7671416391788479, "compression_loss": 0.0, "distillation_loss": 0.04782456159591675, "epoch": 2.48, "learning_rate": 3.607214667662314e-05, "loss": 0.0502, "step": 2612, "task_loss": 0.07197590172290802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767158593957059, "compression_loss": 0.0, "distillation_loss": 0.14677798748016357, "epoch": 2.48, "learning_rate": 3.606259241235201e-05, "loss": 0.1464, "step": 2613, "task_loss": 0.1426515430212021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7671755429018696, "compression_loss": 0.0, "distillation_loss": 0.08823909610509872, "epoch": 2.48, "learning_rate": 3.605303613850768e-05, "loss": 0.0938, "step": 2614, "task_loss": 0.14365153014659882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7671924860142837, "compression_loss": 0.0, "distillation_loss": 0.21100696921348572, "epoch": 2.48, "learning_rate": 3.604347785682611e-05, "loss": 0.2037, "step": 2615, "task_loss": 0.13835720717906952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7672094232953047, "compression_loss": 0.0, "distillation_loss": 0.12257960438728333, "epoch": 2.48, "learning_rate": 3.60339175690436e-05, "loss": 0.115, "step": 2616, "task_loss": 0.04694103077054024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7672263547459366, "compression_loss": 0.0, "distillation_loss": 0.18178528547286987, "epoch": 2.49, "learning_rate": 3.602435527689683e-05, "loss": 0.1834, "step": 2617, "task_loss": 0.198032945394516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7672432803671828, "compression_loss": 0.0, "distillation_loss": 0.11362110078334808, "epoch": 2.49, "learning_rate": 3.6014790982122816e-05, "loss": 0.1279, "step": 2618, "task_loss": 0.25671568512916565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767260200160047, "compression_loss": 0.0, "distillation_loss": 0.10963121801614761, "epoch": 2.49, "learning_rate": 3.6005224686458985e-05, "loss": 0.1102, "step": 2619, "task_loss": 0.11555326730012894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7672771141255331, "compression_loss": 0.0, "distillation_loss": 0.02279139682650566, "epoch": 2.49, "learning_rate": 3.599565639164308e-05, "loss": 0.0305, "step": 2620, "task_loss": 0.09947670251131058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7672940222646447, "compression_loss": 0.0, "distillation_loss": 0.07615819573402405, "epoch": 2.49, "learning_rate": 3.5986086099413234e-05, "loss": 0.0782, "step": 2621, "task_loss": 0.09702453017234802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673109245783853, "compression_loss": 0.0, "distillation_loss": 0.09059733152389526, "epoch": 2.49, "learning_rate": 3.597651381150795e-05, "loss": 0.1036, "step": 2622, "task_loss": 0.22066594660282135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673278210677588, "compression_loss": 0.0, "distillation_loss": 0.17163929343223572, "epoch": 2.49, "learning_rate": 3.5966939529666056e-05, "loss": 0.1662, "step": 2623, "task_loss": 0.11702217161655426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673447117337687, "compression_loss": 0.0, "distillation_loss": 0.25577402114868164, "epoch": 2.49, "learning_rate": 3.595736325562679e-05, "loss": 0.2671, "step": 2624, "task_loss": 0.36930835247039795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673615965774189, "compression_loss": 0.0, "distillation_loss": 0.0822661817073822, "epoch": 2.49, "learning_rate": 3.5947784991129716e-05, "loss": 0.0869, "step": 2625, "task_loss": 0.1290304958820343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673784755997128, "compression_loss": 0.0, "distillation_loss": 0.059237804263830185, "epoch": 2.49, "learning_rate": 3.593820473791476e-05, "loss": 0.0793, "step": 2626, "task_loss": 0.26018190383911133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7673953488016544, "compression_loss": 0.0, "distillation_loss": 0.31930679082870483, "epoch": 2.49, "learning_rate": 3.5928622497722245e-05, "loss": 0.3108, "step": 2627, "task_loss": 0.2345261424779892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7674122161842472, "compression_loss": 0.0, "distillation_loss": 0.12323985993862152, "epoch": 2.5, "learning_rate": 3.591903827229282e-05, "loss": 0.1256, "step": 2628, "task_loss": 0.1471230387687683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7674290777484948, "compression_loss": 0.0, "distillation_loss": 0.16051419079303741, "epoch": 2.5, "learning_rate": 3.590945206336751e-05, "loss": 0.1523, "step": 2629, "task_loss": 0.07876568287611008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767445933495401, "compression_loss": 0.0, "distillation_loss": 0.15569360554218292, "epoch": 2.5, "learning_rate": 3.5899863872687675e-05, "loss": 0.1511, "step": 2630, "task_loss": 0.10985402017831802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7674627834259696, "compression_loss": 0.0, "distillation_loss": 0.3402307629585266, "epoch": 2.5, "learning_rate": 3.58902737019951e-05, "loss": 0.3425, "step": 2631, "task_loss": 0.36277827620506287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767479627541204, "compression_loss": 0.0, "distillation_loss": 0.2227693647146225, "epoch": 2.5, "learning_rate": 3.5880681553031835e-05, "loss": 0.2177, "step": 2632, "task_loss": 0.17254310846328735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7674964658421082, "compression_loss": 0.0, "distillation_loss": 0.1516626626253128, "epoch": 2.5, "learning_rate": 3.5871087427540375e-05, "loss": 0.153, "step": 2633, "task_loss": 0.16509924829006195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675132983296856, "compression_loss": 0.0, "distillation_loss": 0.06550684571266174, "epoch": 2.5, "learning_rate": 3.586149132726353e-05, "loss": 0.0695, "step": 2634, "task_loss": 0.10565401613712311 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675301250049401, "compression_loss": 0.0, "distillation_loss": 0.08065078407526016, "epoch": 2.5, "learning_rate": 3.585189325394447e-05, "loss": 0.0776, "step": 2635, "task_loss": 0.04991026595234871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675469458688752, "compression_loss": 0.0, "distillation_loss": 0.10402727127075195, "epoch": 2.5, "learning_rate": 3.5842293209326746e-05, "loss": 0.1008, "step": 2636, "task_loss": 0.07204234600067139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675637609224947, "compression_loss": 0.0, "distillation_loss": 0.1009630411863327, "epoch": 2.5, "learning_rate": 3.583269119515423e-05, "loss": 0.1036, "step": 2637, "task_loss": 0.1269753873348236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675805701668021, "compression_loss": 0.0, "distillation_loss": 0.08721812069416046, "epoch": 2.51, "learning_rate": 3.58230872131712e-05, "loss": 0.0916, "step": 2638, "task_loss": 0.13126561045646667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7675973736028014, "compression_loss": 0.0, "distillation_loss": 0.06688186526298523, "epoch": 2.51, "learning_rate": 3.581348126512225e-05, "loss": 0.0659, "step": 2639, "task_loss": 0.0573241263628006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676141712314961, "compression_loss": 0.0, "distillation_loss": 0.07377175241708755, "epoch": 2.51, "learning_rate": 3.5803873352752343e-05, "loss": 0.0862, "step": 2640, "task_loss": 0.19805538654327393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676309630538898, "compression_loss": 0.0, "distillation_loss": 0.1930721551179886, "epoch": 2.51, "learning_rate": 3.5794263477806816e-05, "loss": 0.185, "step": 2641, "task_loss": 0.1118684709072113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676477490709863, "compression_loss": 0.0, "distillation_loss": 0.13114435970783234, "epoch": 2.51, "learning_rate": 3.578465164203134e-05, "loss": 0.1396, "step": 2642, "task_loss": 0.21528376638889313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676645292837894, "compression_loss": 0.0, "distillation_loss": 0.1114916130900383, "epoch": 2.51, "learning_rate": 3.577503784717195e-05, "loss": 0.1063, "step": 2643, "task_loss": 0.05928418040275574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676813036933025, "compression_loss": 0.0, "distillation_loss": 0.09665559232234955, "epoch": 2.51, "learning_rate": 3.576542209497505e-05, "loss": 0.0976, "step": 2644, "task_loss": 0.10586561262607574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7676980723005294, "compression_loss": 0.0, "distillation_loss": 0.057753339409828186, "epoch": 2.51, "learning_rate": 3.575580438718738e-05, "loss": 0.0596, "step": 2645, "task_loss": 0.07584469765424728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7677148351064739, "compression_loss": 0.0, "distillation_loss": 0.11892731487751007, "epoch": 2.51, "learning_rate": 3.574618472555604e-05, "loss": 0.1125, "step": 2646, "task_loss": 0.05511043220758438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7677315921121395, "compression_loss": 0.0, "distillation_loss": 0.1064673513174057, "epoch": 2.51, "learning_rate": 3.573656311182848e-05, "loss": 0.1096, "step": 2647, "task_loss": 0.13810373842716217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7677483433185299, "compression_loss": 0.0, "distillation_loss": 0.17461617290973663, "epoch": 2.51, "learning_rate": 3.5726939547752536e-05, "loss": 0.1789, "step": 2648, "task_loss": 0.21704277396202087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767765088726649, "compression_loss": 0.0, "distillation_loss": 0.22404174506664276, "epoch": 2.52, "learning_rate": 3.5717314035076355e-05, "loss": 0.216, "step": 2649, "task_loss": 0.14349466562271118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7677818283375003, "compression_loss": 0.0, "distillation_loss": 0.05500160902738571, "epoch": 2.52, "learning_rate": 3.570768657554847e-05, "loss": 0.0503, "step": 2650, "task_loss": 0.008138328790664673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7677985621520874, "compression_loss": 0.0, "distillation_loss": 0.12913605570793152, "epoch": 2.52, "learning_rate": 3.569805717091775e-05, "loss": 0.1416, "step": 2651, "task_loss": 0.2541487216949463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7678152901714143, "compression_loss": 0.0, "distillation_loss": 0.10944856703281403, "epoch": 2.52, "learning_rate": 3.5688425822933414e-05, "loss": 0.1132, "step": 2652, "task_loss": 0.14650918543338776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7678320123964844, "compression_loss": 0.0, "distillation_loss": 0.031936414539813995, "epoch": 2.52, "learning_rate": 3.5678792533345055e-05, "loss": 0.0292, "step": 2653, "task_loss": 0.004621490836143494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7678487288283014, "compression_loss": 0.0, "distillation_loss": 0.0944894552230835, "epoch": 2.52, "learning_rate": 3.5669157303902604e-05, "loss": 0.0947, "step": 2654, "task_loss": 0.09676090627908707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767865439467869, "compression_loss": 0.0, "distillation_loss": 0.03909418731927872, "epoch": 2.52, "learning_rate": 3.565952013635635e-05, "loss": 0.0408, "step": 2655, "task_loss": 0.05638463795185089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767882144316191, "compression_loss": 0.0, "distillation_loss": 0.06495656073093414, "epoch": 2.52, "learning_rate": 3.564988103245692e-05, "loss": 0.0731, "step": 2656, "task_loss": 0.14613646268844604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.767898843374271, "compression_loss": 0.0, "distillation_loss": 0.09904111921787262, "epoch": 2.52, "learning_rate": 3.564023999395531e-05, "loss": 0.1154, "step": 2657, "task_loss": 0.26308944821357727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679155366431126, "compression_loss": 0.0, "distillation_loss": 0.13033923506736755, "epoch": 2.52, "learning_rate": 3.563059702260287e-05, "loss": 0.1402, "step": 2658, "task_loss": 0.22941374778747559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679322241237196, "compression_loss": 0.0, "distillation_loss": 0.038218479603528976, "epoch": 2.53, "learning_rate": 3.562095212015128e-05, "loss": 0.0439, "step": 2659, "task_loss": 0.09546901285648346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679489058170957, "compression_loss": 0.0, "distillation_loss": 0.06592346727848053, "epoch": 2.53, "learning_rate": 3.5611305288352576e-05, "loss": 0.0639, "step": 2660, "task_loss": 0.04572898894548416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679655817242446, "compression_loss": 0.0, "distillation_loss": 0.1503419280052185, "epoch": 2.53, "learning_rate": 3.560165652895917e-05, "loss": 0.1453, "step": 2661, "task_loss": 0.09945236146450043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679822518461699, "compression_loss": 0.0, "distillation_loss": 0.03605799376964569, "epoch": 2.53, "learning_rate": 3.5592005843723794e-05, "loss": 0.0407, "step": 2662, "task_loss": 0.08254842460155487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7679989161838752, "compression_loss": 0.0, "distillation_loss": 0.06412842869758606, "epoch": 2.53, "learning_rate": 3.558235323439955e-05, "loss": 0.0721, "step": 2663, "task_loss": 0.14363062381744385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680155747383642, "compression_loss": 0.0, "distillation_loss": 0.045804090797901154, "epoch": 2.53, "learning_rate": 3.557269870273987e-05, "loss": 0.0485, "step": 2664, "task_loss": 0.07302069664001465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680322275106408, "compression_loss": 0.0, "distillation_loss": 0.1284482777118683, "epoch": 2.53, "learning_rate": 3.5563042250498556e-05, "loss": 0.131, "step": 2665, "task_loss": 0.1538638472557068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680488745017086, "compression_loss": 0.0, "distillation_loss": 0.037929221987724304, "epoch": 2.53, "learning_rate": 3.555338387942974e-05, "loss": 0.0348, "step": 2666, "task_loss": 0.0066341981291770935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680655157125711, "compression_loss": 0.0, "distillation_loss": 0.04109591618180275, "epoch": 2.53, "learning_rate": 3.5543723591287916e-05, "loss": 0.0568, "step": 2667, "task_loss": 0.19810107350349426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680821511442322, "compression_loss": 0.0, "distillation_loss": 0.03841260448098183, "epoch": 2.53, "learning_rate": 3.5534061387827936e-05, "loss": 0.0355, "step": 2668, "task_loss": 0.009714646264910698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7680987807976954, "compression_loss": 0.0, "distillation_loss": 0.11870106309652328, "epoch": 2.53, "learning_rate": 3.552439727080495e-05, "loss": 0.1244, "step": 2669, "task_loss": 0.1760600507259369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7681154046739646, "compression_loss": 0.0, "distillation_loss": 0.10362593829631805, "epoch": 2.54, "learning_rate": 3.5514731241974544e-05, "loss": 0.1104, "step": 2670, "task_loss": 0.171173095703125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7681320227740432, "compression_loss": 0.0, "distillation_loss": 0.14736101031303406, "epoch": 2.54, "learning_rate": 3.5505063303092545e-05, "loss": 0.142, "step": 2671, "task_loss": 0.09334026277065277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7681486350989352, "compression_loss": 0.0, "distillation_loss": 0.16119350492954254, "epoch": 2.54, "learning_rate": 3.549539345591521e-05, "loss": 0.1542, "step": 2672, "task_loss": 0.09096268564462662 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768165241649644, "compression_loss": 0.0, "distillation_loss": 0.22717413306236267, "epoch": 2.54, "learning_rate": 3.5485721702199104e-05, "loss": 0.2191, "step": 2673, "task_loss": 0.1469288021326065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7681818424271735, "compression_loss": 0.0, "distillation_loss": 0.04752913862466812, "epoch": 2.54, "learning_rate": 3.547604804370116e-05, "loss": 0.0435, "step": 2674, "task_loss": 0.007610570639371872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7681984374325271, "compression_loss": 0.0, "distillation_loss": 0.1687610149383545, "epoch": 2.54, "learning_rate": 3.5466372482178635e-05, "loss": 0.1833, "step": 2675, "task_loss": 0.31412065029144287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682150266667088, "compression_loss": 0.0, "distillation_loss": 0.1348022222518921, "epoch": 2.54, "learning_rate": 3.545669501938913e-05, "loss": 0.1269, "step": 2676, "task_loss": 0.056100159883499146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682316101307222, "compression_loss": 0.0, "distillation_loss": 0.20402267575263977, "epoch": 2.54, "learning_rate": 3.544701565709063e-05, "loss": 0.2063, "step": 2677, "task_loss": 0.22638899087905884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682481878255708, "compression_loss": 0.0, "distillation_loss": 0.14998939633369446, "epoch": 2.54, "learning_rate": 3.54373343970414e-05, "loss": 0.143, "step": 2678, "task_loss": 0.08004312962293625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682647597522585, "compression_loss": 0.0, "distillation_loss": 0.07659981399774551, "epoch": 2.54, "learning_rate": 3.542765124100014e-05, "loss": 0.0728, "step": 2679, "task_loss": 0.03887277841567993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682813259117889, "compression_loss": 0.0, "distillation_loss": 0.06985659897327423, "epoch": 2.55, "learning_rate": 3.541796619072579e-05, "loss": 0.0637, "step": 2680, "task_loss": 0.008264170959591866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7682978863051656, "compression_loss": 0.0, "distillation_loss": 0.14076358079910278, "epoch": 2.55, "learning_rate": 3.540827924797772e-05, "loss": 0.143, "step": 2681, "task_loss": 0.16294986009597778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7683144409333924, "compression_loss": 0.0, "distillation_loss": 0.05743217468261719, "epoch": 2.55, "learning_rate": 3.5398590414515586e-05, "loss": 0.0619, "step": 2682, "task_loss": 0.10237649083137512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7683309897974729, "compression_loss": 0.0, "distillation_loss": 0.0506620779633522, "epoch": 2.55, "learning_rate": 3.5388899692099433e-05, "loss": 0.0486, "step": 2683, "task_loss": 0.03040938824415207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7683475328984108, "compression_loss": 0.0, "distillation_loss": 0.042747851461172104, "epoch": 2.55, "learning_rate": 3.537920708248961e-05, "loss": 0.062, "step": 2684, "task_loss": 0.23519286513328552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76836407023721, "compression_loss": 0.0, "distillation_loss": 0.13504308462142944, "epoch": 2.55, "learning_rate": 3.536951258744684e-05, "loss": 0.1385, "step": 2685, "task_loss": 0.16998833417892456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7683806018148738, "compression_loss": 0.0, "distillation_loss": 0.16051191091537476, "epoch": 2.55, "learning_rate": 3.5359816208732164e-05, "loss": 0.1578, "step": 2686, "task_loss": 0.13356226682662964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7683971276324061, "compression_loss": 0.0, "distillation_loss": 0.08552385866641998, "epoch": 2.55, "learning_rate": 3.535011794810698e-05, "loss": 0.0798, "step": 2687, "task_loss": 0.027825014665722847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684136476908106, "compression_loss": 0.0, "distillation_loss": 0.04348837211728096, "epoch": 2.55, "learning_rate": 3.5340417807333026e-05, "loss": 0.0489, "step": 2688, "task_loss": 0.0971134603023529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684301619910909, "compression_loss": 0.0, "distillation_loss": 0.03343481570482254, "epoch": 2.55, "learning_rate": 3.533071578817239e-05, "loss": 0.0309, "step": 2689, "task_loss": 0.008328597992658615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684466705342508, "compression_loss": 0.0, "distillation_loss": 0.09740258753299713, "epoch": 2.55, "learning_rate": 3.5321011892387455e-05, "loss": 0.0948, "step": 2690, "task_loss": 0.07093527913093567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684631733212938, "compression_loss": 0.0, "distillation_loss": 0.048360321670770645, "epoch": 2.56, "learning_rate": 3.5311306121741015e-05, "loss": 0.0522, "step": 2691, "task_loss": 0.08722103387117386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684796703532236, "compression_loss": 0.0, "distillation_loss": 0.021398166194558144, "epoch": 2.56, "learning_rate": 3.530159847799616e-05, "loss": 0.0198, "step": 2692, "task_loss": 0.005231667309999466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7684961616310441, "compression_loss": 0.0, "distillation_loss": 0.2711659371852875, "epoch": 2.56, "learning_rate": 3.529188896291632e-05, "loss": 0.257, "step": 2693, "task_loss": 0.1298050582408905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685126471557587, "compression_loss": 0.0, "distillation_loss": 0.07550038397312164, "epoch": 2.56, "learning_rate": 3.5282177578265296e-05, "loss": 0.0708, "step": 2694, "task_loss": 0.028521571308374405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685291269283714, "compression_loss": 0.0, "distillation_loss": 0.17867624759674072, "epoch": 2.56, "learning_rate": 3.527246432580718e-05, "loss": 0.1926, "step": 2695, "task_loss": 0.31801068782806396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685456009498856, "compression_loss": 0.0, "distillation_loss": 0.1781276911497116, "epoch": 2.56, "learning_rate": 3.526274920730645e-05, "loss": 0.1767, "step": 2696, "task_loss": 0.16398294270038605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685620692213051, "compression_loss": 0.0, "distillation_loss": 0.0298934206366539, "epoch": 2.56, "learning_rate": 3.525303222452791e-05, "loss": 0.0383, "step": 2697, "task_loss": 0.11393886804580688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685785317436336, "compression_loss": 0.0, "distillation_loss": 0.03145752474665642, "epoch": 2.56, "learning_rate": 3.5243313379236685e-05, "loss": 0.0375, "step": 2698, "task_loss": 0.09145695716142654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7685949885178748, "compression_loss": 0.0, "distillation_loss": 0.3614198565483093, "epoch": 2.56, "learning_rate": 3.5233592673198245e-05, "loss": 0.3515, "step": 2699, "task_loss": 0.26197120547294617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7686114395450322, "compression_loss": 0.0, "distillation_loss": 0.06908755749464035, "epoch": 2.56, "learning_rate": 3.522387010817842e-05, "loss": 0.0775, "step": 2700, "task_loss": 0.15282829105854034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7686278848261097, "compression_loss": 0.0, "distillation_loss": 0.07420150190591812, "epoch": 2.57, "learning_rate": 3.521414568594335e-05, "loss": 0.0918, "step": 2701, "task_loss": 0.25066766142845154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7686443243621108, "compression_loss": 0.0, "distillation_loss": 0.2168622463941574, "epoch": 2.57, "learning_rate": 3.520441940825952e-05, "loss": 0.2086, "step": 2702, "task_loss": 0.1346578747034073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7686607581540394, "compression_loss": 0.0, "distillation_loss": 0.2985137701034546, "epoch": 2.57, "learning_rate": 3.5194691276893755e-05, "loss": 0.2939, "step": 2703, "task_loss": 0.2523634433746338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768677186202899, "compression_loss": 0.0, "distillation_loss": 0.06703074276447296, "epoch": 2.57, "learning_rate": 3.518496129361323e-05, "loss": 0.0638, "step": 2704, "task_loss": 0.03458258882164955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7686936085096935, "compression_loss": 0.0, "distillation_loss": 0.044759538024663925, "epoch": 2.57, "learning_rate": 3.5175229460185425e-05, "loss": 0.0411, "step": 2705, "task_loss": 0.007678527384996414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687100250754263, "compression_loss": 0.0, "distillation_loss": 0.03616587817668915, "epoch": 2.57, "learning_rate": 3.5165495778378196e-05, "loss": 0.0336, "step": 2706, "task_loss": 0.010267160832881927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687264359011012, "compression_loss": 0.0, "distillation_loss": 0.22257006168365479, "epoch": 2.57, "learning_rate": 3.51557602499597e-05, "loss": 0.2122, "step": 2707, "task_loss": 0.1191282868385315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687428409877219, "compression_loss": 0.0, "distillation_loss": 0.16167038679122925, "epoch": 2.57, "learning_rate": 3.514602287669844e-05, "loss": 0.1738, "step": 2708, "task_loss": 0.2829251289367676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687592403362921, "compression_loss": 0.0, "distillation_loss": 0.020230792462825775, "epoch": 2.57, "learning_rate": 3.5136283660363255e-05, "loss": 0.0191, "step": 2709, "task_loss": 0.008950954303145409 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687756339478155, "compression_loss": 0.0, "distillation_loss": 0.10566231608390808, "epoch": 2.57, "learning_rate": 3.5126542602723334e-05, "loss": 0.1088, "step": 2710, "task_loss": 0.13719259202480316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7687920218232956, "compression_loss": 0.0, "distillation_loss": 0.16966648399829865, "epoch": 2.57, "learning_rate": 3.5116799705548175e-05, "loss": 0.1624, "step": 2711, "task_loss": 0.09719309955835342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7688084039637363, "compression_loss": 0.0, "distillation_loss": 0.050816163420677185, "epoch": 2.58, "learning_rate": 3.510705497060762e-05, "loss": 0.0547, "step": 2712, "task_loss": 0.08981159329414368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7688247803701412, "compression_loss": 0.0, "distillation_loss": 0.025936776772141457, "epoch": 2.58, "learning_rate": 3.509730839967187e-05, "loss": 0.0244, "step": 2713, "task_loss": 0.010662872344255447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768841151043514, "compression_loss": 0.0, "distillation_loss": 0.07976531982421875, "epoch": 2.58, "learning_rate": 3.508755999451141e-05, "loss": 0.0739, "step": 2714, "task_loss": 0.020689811557531357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7688575159848584, "compression_loss": 0.0, "distillation_loss": 0.037483714520931244, "epoch": 2.58, "learning_rate": 3.50778097568971e-05, "loss": 0.04, "step": 2715, "task_loss": 0.06217388063669205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768873875195178, "compression_loss": 0.0, "distillation_loss": 0.13560466468334198, "epoch": 2.58, "learning_rate": 3.506805768860011e-05, "loss": 0.1373, "step": 2716, "task_loss": 0.1521109640598297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7688902286754765, "compression_loss": 0.0, "distillation_loss": 0.0700618177652359, "epoch": 2.58, "learning_rate": 3.505830379139195e-05, "loss": 0.0776, "step": 2717, "task_loss": 0.1449713408946991 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7689065764267576, "compression_loss": 0.0, "distillation_loss": 0.1059018149971962, "epoch": 2.58, "learning_rate": 3.504854806704446e-05, "loss": 0.1082, "step": 2718, "task_loss": 0.12906883656978607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768922918450025, "compression_loss": 0.0, "distillation_loss": 0.06906574964523315, "epoch": 2.58, "learning_rate": 3.503879051732983e-05, "loss": 0.0792, "step": 2719, "task_loss": 0.16997401416301727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7689392547462826, "compression_loss": 0.0, "distillation_loss": 0.03843502700328827, "epoch": 2.58, "learning_rate": 3.502903114402055e-05, "loss": 0.0541, "step": 2720, "task_loss": 0.1947551667690277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7689555853165336, "compression_loss": 0.0, "distillation_loss": 0.07670299708843231, "epoch": 2.58, "learning_rate": 3.501926994888946e-05, "loss": 0.071, "step": 2721, "task_loss": 0.019545117393136024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.768971910161782, "compression_loss": 0.0, "distillation_loss": 0.2852790057659149, "epoch": 2.58, "learning_rate": 3.500950693370974e-05, "loss": 0.2769, "step": 2722, "task_loss": 0.20112605392932892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7689882292830315, "compression_loss": 0.0, "distillation_loss": 0.06108977273106575, "epoch": 2.59, "learning_rate": 3.499974210025487e-05, "loss": 0.0675, "step": 2723, "task_loss": 0.12486658245325089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690045426812856, "compression_loss": 0.0, "distillation_loss": 0.07272262871265411, "epoch": 2.59, "learning_rate": 3.4989975450298694e-05, "loss": 0.0792, "step": 2724, "task_loss": 0.13739514350891113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690208503575482, "compression_loss": 0.0, "distillation_loss": 0.13614577054977417, "epoch": 2.59, "learning_rate": 3.498020698561536e-05, "loss": 0.1551, "step": 2725, "task_loss": 0.32593345642089844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690371523128228, "compression_loss": 0.0, "distillation_loss": 0.0375310480594635, "epoch": 2.59, "learning_rate": 3.497043670797936e-05, "loss": 0.0366, "step": 2726, "task_loss": 0.028382275253534317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690534485481131, "compression_loss": 0.0, "distillation_loss": 0.04977725073695183, "epoch": 2.59, "learning_rate": 3.496066461916552e-05, "loss": 0.0467, "step": 2727, "task_loss": 0.019020110368728638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690697390644229, "compression_loss": 0.0, "distillation_loss": 0.1019824743270874, "epoch": 2.59, "learning_rate": 3.495089072094898e-05, "loss": 0.0978, "step": 2728, "task_loss": 0.06048191338777542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7690860238627558, "compression_loss": 0.0, "distillation_loss": 0.021417532116174698, "epoch": 2.59, "learning_rate": 3.494111501510522e-05, "loss": 0.0293, "step": 2729, "task_loss": 0.10010670870542526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7691023029441155, "compression_loss": 0.0, "distillation_loss": 0.04595291242003441, "epoch": 2.59, "learning_rate": 3.4931337503410034e-05, "loss": 0.0618, "step": 2730, "task_loss": 0.20489037036895752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7691185763095056, "compression_loss": 0.0, "distillation_loss": 0.07521625608205795, "epoch": 2.59, "learning_rate": 3.4921558187639556e-05, "loss": 0.0711, "step": 2731, "task_loss": 0.0338003970682621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76913484395993, "compression_loss": 0.0, "distillation_loss": 0.15600326657295227, "epoch": 2.59, "learning_rate": 3.491177706957026e-05, "loss": 0.1498, "step": 2732, "task_loss": 0.0938263088464737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7691511058963922, "compression_loss": 0.0, "distillation_loss": 0.19931530952453613, "epoch": 2.6, "learning_rate": 3.490199415097892e-05, "loss": 0.1947, "step": 2733, "task_loss": 0.1532270908355713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.769167362119896, "compression_loss": 0.0, "distillation_loss": 0.08603714406490326, "epoch": 2.6, "learning_rate": 3.489220943364266e-05, "loss": 0.0839, "step": 2734, "task_loss": 0.06482724845409393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7691836126314449, "compression_loss": 0.0, "distillation_loss": 0.0459892675280571, "epoch": 2.6, "learning_rate": 3.488242291933891e-05, "loss": 0.0517, "step": 2735, "task_loss": 0.10307285934686661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7691998574320427, "compression_loss": 0.0, "distillation_loss": 0.09086348861455917, "epoch": 2.6, "learning_rate": 3.487263460984544e-05, "loss": 0.0902, "step": 2736, "task_loss": 0.08435454964637756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7692160965226931, "compression_loss": 0.0, "distillation_loss": 0.14168088138103485, "epoch": 2.6, "learning_rate": 3.486284450694035e-05, "loss": 0.1393, "step": 2737, "task_loss": 0.11769342422485352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7692323299043997, "compression_loss": 0.0, "distillation_loss": 0.06477680802345276, "epoch": 2.6, "learning_rate": 3.485305261240205e-05, "loss": 0.0647, "step": 2738, "task_loss": 0.06358174979686737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7692485575781663, "compression_loss": 0.0, "distillation_loss": 0.10200376808643341, "epoch": 2.6, "learning_rate": 3.4843258928009294e-05, "loss": 0.1103, "step": 2739, "task_loss": 0.18466830253601074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7692647795449965, "compression_loss": 0.0, "distillation_loss": 0.12464478611946106, "epoch": 2.6, "learning_rate": 3.4833463455541146e-05, "loss": 0.1179, "step": 2740, "task_loss": 0.05756617337465286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.769280995805894, "compression_loss": 0.0, "distillation_loss": 0.38097622990608215, "epoch": 2.6, "learning_rate": 3.4823666196777006e-05, "loss": 0.3663, "step": 2741, "task_loss": 0.23438085615634918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7692972063618625, "compression_loss": 0.0, "distillation_loss": 0.09649477154016495, "epoch": 2.6, "learning_rate": 3.481386715349659e-05, "loss": 0.0907, "step": 2742, "task_loss": 0.03846879303455353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7693134112139056, "compression_loss": 0.0, "distillation_loss": 0.049957577139139175, "epoch": 2.6, "learning_rate": 3.4804066327479934e-05, "loss": 0.0601, "step": 2743, "task_loss": 0.15115505456924438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7693296103630272, "compression_loss": 0.0, "distillation_loss": 0.06845077127218246, "epoch": 2.61, "learning_rate": 3.4794263720507427e-05, "loss": 0.071, "step": 2744, "task_loss": 0.09412077814340591 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7693458038102307, "compression_loss": 0.0, "distillation_loss": 0.09953967481851578, "epoch": 2.61, "learning_rate": 3.478445933435973e-05, "loss": 0.1021, "step": 2745, "task_loss": 0.1255946159362793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.76936199155652, "compression_loss": 0.0, "distillation_loss": 0.04850146174430847, "epoch": 2.61, "learning_rate": 3.4774653170817884e-05, "loss": 0.0495, "step": 2746, "task_loss": 0.0581977404654026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7693781736028985, "compression_loss": 0.0, "distillation_loss": 0.06344247609376907, "epoch": 2.61, "learning_rate": 3.4764845231663205e-05, "loss": 0.0654, "step": 2747, "task_loss": 0.0828084945678711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7693943499503703, "compression_loss": 0.0, "distillation_loss": 0.16078636050224304, "epoch": 2.61, "learning_rate": 3.475503551867737e-05, "loss": 0.1636, "step": 2748, "task_loss": 0.1886562705039978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694105205999388, "compression_loss": 0.0, "distillation_loss": 0.12109316885471344, "epoch": 2.61, "learning_rate": 3.474522403364235e-05, "loss": 0.1212, "step": 2749, "task_loss": 0.12180116772651672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694266855526076, "compression_loss": 0.0, "distillation_loss": 0.11483351141214371, "epoch": 2.61, "learning_rate": 3.473541077834045e-05, "loss": 0.1184, "step": 2750, "task_loss": 0.1501813381910324 }, { "epoch": 2.61, "eval_accuracy": 0.893348623853211, "eval_loss": 0.4236961901187897, "eval_runtime": 18.2679, "eval_samples_per_second": 47.734, "eval_steps_per_second": 5.967, "step": 2750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694428448093806, "compression_loss": 0.0, "distillation_loss": 0.11966343224048615, "epoch": 2.61, "learning_rate": 3.4725595754554295e-05, "loss": 0.1252, "step": 2751, "task_loss": 0.17513278126716614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694589983712615, "compression_loss": 0.0, "distillation_loss": 0.10641565918922424, "epoch": 2.61, "learning_rate": 3.471577896406683e-05, "loss": 0.1126, "step": 2752, "task_loss": 0.1684948056936264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694751462392538, "compression_loss": 0.0, "distillation_loss": 0.08618808537721634, "epoch": 2.61, "learning_rate": 3.470596040866133e-05, "loss": 0.0846, "step": 2753, "task_loss": 0.07057564705610275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7694912884143613, "compression_loss": 0.0, "distillation_loss": 0.05224674940109253, "epoch": 2.62, "learning_rate": 3.4696140090121376e-05, "loss": 0.0487, "step": 2754, "task_loss": 0.016593074426054955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695074248975876, "compression_loss": 0.0, "distillation_loss": 0.1727830171585083, "epoch": 2.62, "learning_rate": 3.468631801023088e-05, "loss": 0.168, "step": 2755, "task_loss": 0.12460935115814209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695235556899365, "compression_loss": 0.0, "distillation_loss": 0.04192081093788147, "epoch": 2.62, "learning_rate": 3.467649417077406e-05, "loss": 0.0526, "step": 2756, "task_loss": 0.14908772706985474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695396807924116, "compression_loss": 0.0, "distillation_loss": 0.13823646306991577, "epoch": 2.62, "learning_rate": 3.466666857353547e-05, "loss": 0.1403, "step": 2757, "task_loss": 0.15889990329742432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695558002060165, "compression_loss": 0.0, "distillation_loss": 0.08324067294597626, "epoch": 2.62, "learning_rate": 3.465684122029999e-05, "loss": 0.0865, "step": 2758, "task_loss": 0.11616663634777069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695719139317552, "compression_loss": 0.0, "distillation_loss": 0.18361113965511322, "epoch": 2.62, "learning_rate": 3.464701211285279e-05, "loss": 0.1828, "step": 2759, "task_loss": 0.1751021444797516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7695880219706309, "compression_loss": 0.0, "distillation_loss": 0.038572054356336594, "epoch": 2.62, "learning_rate": 3.463718125297937e-05, "loss": 0.0367, "step": 2760, "task_loss": 0.020345257595181465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696041243236478, "compression_loss": 0.0, "distillation_loss": 0.11891628056764603, "epoch": 2.62, "learning_rate": 3.462734864246557e-05, "loss": 0.1142, "step": 2761, "task_loss": 0.07189692556858063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696202209918092, "compression_loss": 0.0, "distillation_loss": 0.0748853087425232, "epoch": 2.62, "learning_rate": 3.4617514283097524e-05, "loss": 0.0825, "step": 2762, "task_loss": 0.1514107882976532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696363119761188, "compression_loss": 0.0, "distillation_loss": 0.17334090173244476, "epoch": 2.62, "learning_rate": 3.4607678176661695e-05, "loss": 0.1803, "step": 2763, "task_loss": 0.2433452010154724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696523972775805, "compression_loss": 0.0, "distillation_loss": 0.08427559584379196, "epoch": 2.62, "learning_rate": 3.459784032494484e-05, "loss": 0.1008, "step": 2764, "task_loss": 0.24964390695095062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696684768971979, "compression_loss": 0.0, "distillation_loss": 0.04236322641372681, "epoch": 2.63, "learning_rate": 3.458800072973408e-05, "loss": 0.0393, "step": 2765, "task_loss": 0.011962385848164558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7696845508359745, "compression_loss": 0.0, "distillation_loss": 0.04558961093425751, "epoch": 2.63, "learning_rate": 3.45781593928168e-05, "loss": 0.051, "step": 2766, "task_loss": 0.09958979487419128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697006190949143, "compression_loss": 0.0, "distillation_loss": 0.05856577679514885, "epoch": 2.63, "learning_rate": 3.4568316315980745e-05, "loss": 0.0557, "step": 2767, "task_loss": 0.029869040474295616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697166816750208, "compression_loss": 0.0, "distillation_loss": 0.14246824383735657, "epoch": 2.63, "learning_rate": 3.455847150101395e-05, "loss": 0.1396, "step": 2768, "task_loss": 0.11401065438985825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697327385772976, "compression_loss": 0.0, "distillation_loss": 0.08908317983150482, "epoch": 2.63, "learning_rate": 3.454862494970477e-05, "loss": 0.1026, "step": 2769, "task_loss": 0.22423215210437775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697487898027486, "compression_loss": 0.0, "distillation_loss": 0.11774835735559464, "epoch": 2.63, "learning_rate": 3.4538776663841875e-05, "loss": 0.1259, "step": 2770, "task_loss": 0.19950447976589203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697648353523773, "compression_loss": 0.0, "distillation_loss": 0.07487460970878601, "epoch": 2.63, "learning_rate": 3.452892664521427e-05, "loss": 0.0828, "step": 2771, "task_loss": 0.15448282659053802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697808752271874, "compression_loss": 0.0, "distillation_loss": 0.13396842777729034, "epoch": 2.63, "learning_rate": 3.4519074895611244e-05, "loss": 0.1337, "step": 2772, "task_loss": 0.13105513155460358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7697969094281827, "compression_loss": 0.0, "distillation_loss": 0.03769555687904358, "epoch": 2.63, "learning_rate": 3.4509221416822415e-05, "loss": 0.0348, "step": 2773, "task_loss": 0.00889505073428154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698129379563668, "compression_loss": 0.0, "distillation_loss": 0.1526089906692505, "epoch": 2.63, "learning_rate": 3.4499366210637725e-05, "loss": 0.1464, "step": 2774, "task_loss": 0.09016816318035126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698289608127434, "compression_loss": 0.0, "distillation_loss": 0.05517404526472092, "epoch": 2.64, "learning_rate": 3.4489509278847414e-05, "loss": 0.0691, "step": 2775, "task_loss": 0.19403240084648132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698449779983162, "compression_loss": 0.0, "distillation_loss": 0.14141535758972168, "epoch": 2.64, "learning_rate": 3.4479650623242036e-05, "loss": 0.1409, "step": 2776, "task_loss": 0.13665583729743958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698609895140888, "compression_loss": 0.0, "distillation_loss": 0.1828983724117279, "epoch": 2.64, "learning_rate": 3.446979024561246e-05, "loss": 0.1816, "step": 2777, "task_loss": 0.1703716665506363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698769953610649, "compression_loss": 0.0, "distillation_loss": 0.031704775989055634, "epoch": 2.64, "learning_rate": 3.44599281477499e-05, "loss": 0.0384, "step": 2778, "task_loss": 0.098890021443367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7698929955402484, "compression_loss": 0.0, "distillation_loss": 0.03340402618050575, "epoch": 2.64, "learning_rate": 3.4450064331445814e-05, "loss": 0.0319, "step": 2779, "task_loss": 0.018638523295521736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699089900526426, "compression_loss": 0.0, "distillation_loss": 0.040086835622787476, "epoch": 2.64, "learning_rate": 3.444019879849204e-05, "loss": 0.048, "step": 2780, "task_loss": 0.11879396438598633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699249788992515, "compression_loss": 0.0, "distillation_loss": 0.05169200897216797, "epoch": 2.64, "learning_rate": 3.443033155068069e-05, "loss": 0.0542, "step": 2781, "task_loss": 0.07657624781131744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699409620810785, "compression_loss": 0.0, "distillation_loss": 0.20420724153518677, "epoch": 2.64, "learning_rate": 3.442046258980419e-05, "loss": 0.1945, "step": 2782, "task_loss": 0.10754656046628952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699569395991277, "compression_loss": 0.0, "distillation_loss": 0.1341618299484253, "epoch": 2.64, "learning_rate": 3.4410591917655296e-05, "loss": 0.1326, "step": 2783, "task_loss": 0.1190015971660614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699729114544024, "compression_loss": 0.0, "distillation_loss": 0.09340974688529968, "epoch": 2.64, "learning_rate": 3.4400719536027056e-05, "loss": 0.0875, "step": 2784, "task_loss": 0.03399471938610077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7699888776479064, "compression_loss": 0.0, "distillation_loss": 0.19588154554367065, "epoch": 2.64, "learning_rate": 3.4390845446712836e-05, "loss": 0.187, "step": 2785, "task_loss": 0.10719942301511765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700048381806435, "compression_loss": 0.0, "distillation_loss": 0.11635538935661316, "epoch": 2.65, "learning_rate": 3.438096965150632e-05, "loss": 0.1287, "step": 2786, "task_loss": 0.2399107664823532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700207930536171, "compression_loss": 0.0, "distillation_loss": 0.17617712914943695, "epoch": 2.65, "learning_rate": 3.4371092152201485e-05, "loss": 0.1735, "step": 2787, "task_loss": 0.14985665678977966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700367422678311, "compression_loss": 0.0, "distillation_loss": 0.06373878568410873, "epoch": 2.65, "learning_rate": 3.4361212950592624e-05, "loss": 0.0669, "step": 2788, "task_loss": 0.09571292251348495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700526858242893, "compression_loss": 0.0, "distillation_loss": 0.05058780312538147, "epoch": 2.65, "learning_rate": 3.435133204847435e-05, "loss": 0.0469, "step": 2789, "task_loss": 0.013910744339227676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700686237239951, "compression_loss": 0.0, "distillation_loss": 0.10628429800271988, "epoch": 2.65, "learning_rate": 3.4341449447641575e-05, "loss": 0.1091, "step": 2790, "task_loss": 0.13484588265419006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7700845559679523, "compression_loss": 0.0, "distillation_loss": 0.024575646966695786, "epoch": 2.65, "learning_rate": 3.433156514988951e-05, "loss": 0.0287, "step": 2791, "task_loss": 0.06540574878454208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701004825571647, "compression_loss": 0.0, "distillation_loss": 0.11763833463191986, "epoch": 2.65, "learning_rate": 3.432167915701371e-05, "loss": 0.1207, "step": 2792, "task_loss": 0.14855128526687622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701164034926358, "compression_loss": 0.0, "distillation_loss": 0.04107709601521492, "epoch": 2.65, "learning_rate": 3.431179147080999e-05, "loss": 0.0611, "step": 2793, "task_loss": 0.24124276638031006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701323187753693, "compression_loss": 0.0, "distillation_loss": 0.03191131353378296, "epoch": 2.65, "learning_rate": 3.4301902093074504e-05, "loss": 0.0301, "step": 2794, "task_loss": 0.013331804424524307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.770148228406369, "compression_loss": 0.0, "distillation_loss": 0.029843294993042946, "epoch": 2.65, "learning_rate": 3.42920110256037e-05, "loss": 0.0277, "step": 2795, "task_loss": 0.008593736216425896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701641323866384, "compression_loss": 0.0, "distillation_loss": 0.05394424870610237, "epoch": 2.66, "learning_rate": 3.428211827019434e-05, "loss": 0.0504, "step": 2796, "task_loss": 0.018413875252008438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701800307171814, "compression_loss": 0.0, "distillation_loss": 0.12455835193395615, "epoch": 2.66, "learning_rate": 3.42722238286435e-05, "loss": 0.1222, "step": 2797, "task_loss": 0.10132066905498505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7701959233990016, "compression_loss": 0.0, "distillation_loss": 0.036928869783878326, "epoch": 2.66, "learning_rate": 3.426232770274855e-05, "loss": 0.0458, "step": 2798, "task_loss": 0.12519891560077667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7702118104331026, "compression_loss": 0.0, "distillation_loss": 0.03775065019726753, "epoch": 2.66, "learning_rate": 3.4252429894307154e-05, "loss": 0.0426, "step": 2799, "task_loss": 0.08613856136798859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7702276918204881, "compression_loss": 0.0, "distillation_loss": 0.14951038360595703, "epoch": 2.66, "learning_rate": 3.424253040511731e-05, "loss": 0.1439, "step": 2800, "task_loss": 0.09322544187307358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.770243567562162, "compression_loss": 0.0, "distillation_loss": 0.14305658638477325, "epoch": 2.66, "learning_rate": 3.4232629236977316e-05, "loss": 0.1513, "step": 2801, "task_loss": 0.2257470041513443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7702594376591276, "compression_loss": 0.0, "distillation_loss": 0.08632157742977142, "epoch": 2.66, "learning_rate": 3.4222726391685746e-05, "loss": 0.0854, "step": 2802, "task_loss": 0.07660988718271255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7702753021123889, "compression_loss": 0.0, "distillation_loss": 0.12226057052612305, "epoch": 2.66, "learning_rate": 3.42128218710415e-05, "loss": 0.1323, "step": 2803, "task_loss": 0.22227300703525543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7702911609229496, "compression_loss": 0.0, "distillation_loss": 0.05076700448989868, "epoch": 2.66, "learning_rate": 3.420291567684381e-05, "loss": 0.0488, "step": 2804, "task_loss": 0.03137904405593872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703070140918131, "compression_loss": 0.0, "distillation_loss": 0.13917645812034607, "epoch": 2.66, "learning_rate": 3.419300781089216e-05, "loss": 0.1431, "step": 2805, "task_loss": 0.17885732650756836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703228616199832, "compression_loss": 0.0, "distillation_loss": 0.047890555113554, "epoch": 2.66, "learning_rate": 3.418309827498637e-05, "loss": 0.053, "step": 2806, "task_loss": 0.09856240451335907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703387035084637, "compression_loss": 0.0, "distillation_loss": 0.019215038046240807, "epoch": 2.67, "learning_rate": 3.4173187070926546e-05, "loss": 0.0224, "step": 2807, "task_loss": 0.05135765299201012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703545397582582, "compression_loss": 0.0, "distillation_loss": 0.07260240614414215, "epoch": 2.67, "learning_rate": 3.4163274200513116e-05, "loss": 0.0875, "step": 2808, "task_loss": 0.221457377076149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703703703703704, "compression_loss": 0.0, "distillation_loss": 0.06960202753543854, "epoch": 2.67, "learning_rate": 3.415335966554679e-05, "loss": 0.0853, "step": 2809, "task_loss": 0.22612322866916656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7703861953458039, "compression_loss": 0.0, "distillation_loss": 0.0772671177983284, "epoch": 2.67, "learning_rate": 3.414344346782861e-05, "loss": 0.0818, "step": 2810, "task_loss": 0.12265747040510178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704020146855625, "compression_loss": 0.0, "distillation_loss": 0.16112419962882996, "epoch": 2.67, "learning_rate": 3.413352560915988e-05, "loss": 0.1535, "step": 2811, "task_loss": 0.0847577229142189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704178283906499, "compression_loss": 0.0, "distillation_loss": 0.23741042613983154, "epoch": 2.67, "learning_rate": 3.412360609134223e-05, "loss": 0.2338, "step": 2812, "task_loss": 0.20153185725212097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704336364620695, "compression_loss": 0.0, "distillation_loss": 0.10814424604177475, "epoch": 2.67, "learning_rate": 3.411368491617761e-05, "loss": 0.1036, "step": 2813, "task_loss": 0.06266738474369049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704494389008254, "compression_loss": 0.0, "distillation_loss": 0.06540797650814056, "epoch": 2.67, "learning_rate": 3.410376208546822e-05, "loss": 0.0606, "step": 2814, "task_loss": 0.017390571534633636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704652357079209, "compression_loss": 0.0, "distillation_loss": 0.033819716423749924, "epoch": 2.67, "learning_rate": 3.409383760101661e-05, "loss": 0.0382, "step": 2815, "task_loss": 0.07716642320156097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.77048102688436, "compression_loss": 0.0, "distillation_loss": 0.20797955989837646, "epoch": 2.67, "learning_rate": 3.4083911464625596e-05, "loss": 0.1993, "step": 2816, "task_loss": 0.12118206918239594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7704968124311462, "compression_loss": 0.0, "distillation_loss": 0.13762623071670532, "epoch": 2.68, "learning_rate": 3.407398367809832e-05, "loss": 0.1356, "step": 2817, "task_loss": 0.11715132743120193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7705125923492833, "compression_loss": 0.0, "distillation_loss": 0.14595434069633484, "epoch": 2.68, "learning_rate": 3.406405424323821e-05, "loss": 0.1374, "step": 2818, "task_loss": 0.06045624241232872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7705283666397748, "compression_loss": 0.0, "distillation_loss": 0.030591927468776703, "epoch": 2.68, "learning_rate": 3.4054123161848995e-05, "loss": 0.0285, "step": 2819, "task_loss": 0.010143185034394264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7705441353036244, "compression_loss": 0.0, "distillation_loss": 0.21557293832302094, "epoch": 2.68, "learning_rate": 3.4044190435734695e-05, "loss": 0.2094, "step": 2820, "task_loss": 0.15403705835342407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.770559898341836, "compression_loss": 0.0, "distillation_loss": 0.04488247632980347, "epoch": 2.68, "learning_rate": 3.403425606669965e-05, "loss": 0.0427, "step": 2821, "task_loss": 0.023530708625912666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7705756557554131, "compression_loss": 0.0, "distillation_loss": 0.1816938817501068, "epoch": 2.68, "learning_rate": 3.4024320056548475e-05, "loss": 0.174, "step": 2822, "task_loss": 0.10455642640590668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7705914075453595, "compression_loss": 0.0, "distillation_loss": 0.03927179425954819, "epoch": 2.68, "learning_rate": 3.401438240708611e-05, "loss": 0.0501, "step": 2823, "task_loss": 0.14788560569286346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706071537126787, "compression_loss": 0.0, "distillation_loss": 0.1478666365146637, "epoch": 2.68, "learning_rate": 3.400444312011776e-05, "loss": 0.1539, "step": 2824, "task_loss": 0.20857636630535126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706228942583746, "compression_loss": 0.0, "distillation_loss": 0.1341654360294342, "epoch": 2.68, "learning_rate": 3.399450219744896e-05, "loss": 0.1379, "step": 2825, "task_loss": 0.17152555286884308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706386291834507, "compression_loss": 0.0, "distillation_loss": 0.09930291771888733, "epoch": 2.68, "learning_rate": 3.3984559640885505e-05, "loss": 0.1029, "step": 2826, "task_loss": 0.1356475055217743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706543584889108, "compression_loss": 0.0, "distillation_loss": 0.049742672592401505, "epoch": 2.68, "learning_rate": 3.3974615452233526e-05, "loss": 0.0463, "step": 2827, "task_loss": 0.015741702169179916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706700821757586, "compression_loss": 0.0, "distillation_loss": 0.047701530158519745, "epoch": 2.69, "learning_rate": 3.396466963329944e-05, "loss": 0.0506, "step": 2828, "task_loss": 0.0768967792391777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7706858002449977, "compression_loss": 0.0, "distillation_loss": 0.14289794862270355, "epoch": 2.69, "learning_rate": 3.395472218588992e-05, "loss": 0.1379, "step": 2829, "task_loss": 0.09291721880435944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707015126976318, "compression_loss": 0.0, "distillation_loss": 0.11319032311439514, "epoch": 2.69, "learning_rate": 3.394477311181201e-05, "loss": 0.1067, "step": 2830, "task_loss": 0.04876325652003288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707172195346645, "compression_loss": 0.0, "distillation_loss": 0.14416098594665527, "epoch": 2.69, "learning_rate": 3.393482241287297e-05, "loss": 0.1571, "step": 2831, "task_loss": 0.2737889587879181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707329207570998, "compression_loss": 0.0, "distillation_loss": 0.1706182062625885, "epoch": 2.69, "learning_rate": 3.392487009088041e-05, "loss": 0.1614, "step": 2832, "task_loss": 0.07805980741977692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707486163659409, "compression_loss": 0.0, "distillation_loss": 0.14137789607048035, "epoch": 2.69, "learning_rate": 3.391491614764222e-05, "loss": 0.1342, "step": 2833, "task_loss": 0.07003333419561386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707643063621918, "compression_loss": 0.0, "distillation_loss": 0.09552104771137238, "epoch": 2.69, "learning_rate": 3.390496058496657e-05, "loss": 0.1013, "step": 2834, "task_loss": 0.15300557017326355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707799907468562, "compression_loss": 0.0, "distillation_loss": 0.1020299419760704, "epoch": 2.69, "learning_rate": 3.3895003404661955e-05, "loss": 0.1235, "step": 2835, "task_loss": 0.3162316679954529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7707956695209377, "compression_loss": 0.0, "distillation_loss": 0.10217801481485367, "epoch": 2.69, "learning_rate": 3.3885044608537125e-05, "loss": 0.1284, "step": 2836, "task_loss": 0.3640749156475067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708113426854399, "compression_loss": 0.0, "distillation_loss": 0.09118107706308365, "epoch": 2.69, "learning_rate": 3.387508419840115e-05, "loss": 0.0858, "step": 2837, "task_loss": 0.03702104091644287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708270102413666, "compression_loss": 0.0, "distillation_loss": 0.123194620013237, "epoch": 2.7, "learning_rate": 3.386512217606339e-05, "loss": 0.1161, "step": 2838, "task_loss": 0.05249645560979843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708426721897215, "compression_loss": 0.0, "distillation_loss": 0.0807013064622879, "epoch": 2.7, "learning_rate": 3.385515854333349e-05, "loss": 0.0786, "step": 2839, "task_loss": 0.06016937270760536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708583285315082, "compression_loss": 0.0, "distillation_loss": 0.08974245935678482, "epoch": 2.7, "learning_rate": 3.38451933020214e-05, "loss": 0.0873, "step": 2840, "task_loss": 0.0655125230550766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708739792677303, "compression_loss": 0.0, "distillation_loss": 0.18268810212612152, "epoch": 2.7, "learning_rate": 3.383522645393734e-05, "loss": 0.1749, "step": 2841, "task_loss": 0.10475851595401764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7708896243993917, "compression_loss": 0.0, "distillation_loss": 0.23169159889221191, "epoch": 2.7, "learning_rate": 3.3825258000891846e-05, "loss": 0.2429, "step": 2842, "task_loss": 0.34396815299987793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709052639274959, "compression_loss": 0.0, "distillation_loss": 0.13757774233818054, "epoch": 2.7, "learning_rate": 3.381528794469574e-05, "loss": 0.1312, "step": 2843, "task_loss": 0.07409615814685822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709208978530467, "compression_loss": 0.0, "distillation_loss": 0.029491236433386803, "epoch": 2.7, "learning_rate": 3.380531628716012e-05, "loss": 0.0344, "step": 2844, "task_loss": 0.07904480397701263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709365261770478, "compression_loss": 0.0, "distillation_loss": 0.08474864065647125, "epoch": 2.7, "learning_rate": 3.3795343030096384e-05, "loss": 0.0934, "step": 2845, "task_loss": 0.17167925834655762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709521489005028, "compression_loss": 0.0, "distillation_loss": 0.03882834315299988, "epoch": 2.7, "learning_rate": 3.3785368175316226e-05, "loss": 0.0392, "step": 2846, "task_loss": 0.042411088943481445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709677660244153, "compression_loss": 0.0, "distillation_loss": 0.04816707968711853, "epoch": 2.7, "learning_rate": 3.377539172463164e-05, "loss": 0.0444, "step": 2847, "task_loss": 0.010003169998526573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7709833775497892, "compression_loss": 0.0, "distillation_loss": 0.09748025238513947, "epoch": 2.7, "learning_rate": 3.376541367985488e-05, "loss": 0.1011, "step": 2848, "task_loss": 0.13417553901672363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.770998983477628, "compression_loss": 0.0, "distillation_loss": 0.032245099544525146, "epoch": 2.71, "learning_rate": 3.3755434042798506e-05, "loss": 0.0298, "step": 2849, "task_loss": 0.008108781650662422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710145838089355, "compression_loss": 0.0, "distillation_loss": 0.028734374791383743, "epoch": 2.71, "learning_rate": 3.374545281527538e-05, "loss": 0.0302, "step": 2850, "task_loss": 0.043056122958660126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710301785447152, "compression_loss": 0.0, "distillation_loss": 0.04646429419517517, "epoch": 2.71, "learning_rate": 3.3735469999098615e-05, "loss": 0.0477, "step": 2851, "task_loss": 0.05848044529557228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710457676859711, "compression_loss": 0.0, "distillation_loss": 0.09431092441082001, "epoch": 2.71, "learning_rate": 3.372548559608166e-05, "loss": 0.101, "step": 2852, "task_loss": 0.16074809432029724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710613512337066, "compression_loss": 0.0, "distillation_loss": 0.042417608201503754, "epoch": 2.71, "learning_rate": 3.3715499608038234e-05, "loss": 0.0391, "step": 2853, "task_loss": 0.009729046374559402 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710769291889255, "compression_loss": 0.0, "distillation_loss": 0.10295215994119644, "epoch": 2.71, "learning_rate": 3.370551203678233e-05, "loss": 0.0966, "step": 2854, "task_loss": 0.03992331400513649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7710925015526314, "compression_loss": 0.0, "distillation_loss": 0.026899177581071854, "epoch": 2.71, "learning_rate": 3.369552288412822e-05, "loss": 0.0407, "step": 2855, "task_loss": 0.1650460809469223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7711080683258281, "compression_loss": 0.0, "distillation_loss": 0.09420739114284515, "epoch": 2.71, "learning_rate": 3.368553215189052e-05, "loss": 0.0965, "step": 2856, "task_loss": 0.1171763613820076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7711236295095193, "compression_loss": 0.0, "distillation_loss": 0.0723368227481842, "epoch": 2.71, "learning_rate": 3.367553984188407e-05, "loss": 0.0738, "step": 2857, "task_loss": 0.08675511181354523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7711391851047085, "compression_loss": 0.0, "distillation_loss": 0.10755441337823868, "epoch": 2.71, "learning_rate": 3.366554595592402e-05, "loss": 0.103, "step": 2858, "task_loss": 0.06218289956450462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7711547351123995, "compression_loss": 0.0, "distillation_loss": 0.11410737037658691, "epoch": 2.72, "learning_rate": 3.365555049582582e-05, "loss": 0.1191, "step": 2859, "task_loss": 0.16380539536476135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771170279533596, "compression_loss": 0.0, "distillation_loss": 0.09805843234062195, "epoch": 2.72, "learning_rate": 3.364555346340518e-05, "loss": 0.0936, "step": 2860, "task_loss": 0.053569987416267395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7711858183693016, "compression_loss": 0.0, "distillation_loss": 0.11211707442998886, "epoch": 2.72, "learning_rate": 3.3635554860478126e-05, "loss": 0.1062, "step": 2861, "task_loss": 0.053317755460739136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7712013516205201, "compression_loss": 0.0, "distillation_loss": 0.06892388314008713, "epoch": 2.72, "learning_rate": 3.362555468886093e-05, "loss": 0.0627, "step": 2862, "task_loss": 0.006360730156302452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771216879288255, "compression_loss": 0.0, "distillation_loss": 0.10119855403900146, "epoch": 2.72, "learning_rate": 3.361555295037019e-05, "loss": 0.0967, "step": 2863, "task_loss": 0.05646238476037979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7712324013735102, "compression_loss": 0.0, "distillation_loss": 0.10500767827033997, "epoch": 2.72, "learning_rate": 3.360554964682276e-05, "loss": 0.1036, "step": 2864, "task_loss": 0.09080617129802704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7712479178772893, "compression_loss": 0.0, "distillation_loss": 0.025210561230778694, "epoch": 2.72, "learning_rate": 3.359554478003579e-05, "loss": 0.0285, "step": 2865, "task_loss": 0.0582006499171257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771263428800596, "compression_loss": 0.0, "distillation_loss": 0.16816392540931702, "epoch": 2.72, "learning_rate": 3.358553835182673e-05, "loss": 0.1681, "step": 2866, "task_loss": 0.16722428798675537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7712789341444338, "compression_loss": 0.0, "distillation_loss": 0.054264236241579056, "epoch": 2.72, "learning_rate": 3.357553036401326e-05, "loss": 0.0504, "step": 2867, "task_loss": 0.015533113852143288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7712944339098066, "compression_loss": 0.0, "distillation_loss": 0.11492185294628143, "epoch": 2.72, "learning_rate": 3.356552081841341e-05, "loss": 0.1207, "step": 2868, "task_loss": 0.1728590577840805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771309928097718, "compression_loss": 0.0, "distillation_loss": 0.19074279069900513, "epoch": 2.72, "learning_rate": 3.355550971684545e-05, "loss": 0.191, "step": 2869, "task_loss": 0.19339722394943237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7713254167091717, "compression_loss": 0.0, "distillation_loss": 0.09511333703994751, "epoch": 2.73, "learning_rate": 3.3545497061127946e-05, "loss": 0.095, "step": 2870, "task_loss": 0.0938149243593216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7713408997451714, "compression_loss": 0.0, "distillation_loss": 0.03391757979989052, "epoch": 2.73, "learning_rate": 3.353548285307975e-05, "loss": 0.0316, "step": 2871, "task_loss": 0.011141767725348473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7713563772067207, "compression_loss": 0.0, "distillation_loss": 0.04854762554168701, "epoch": 2.73, "learning_rate": 3.352546709451998e-05, "loss": 0.076, "step": 2872, "task_loss": 0.3228394389152527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7713718490948234, "compression_loss": 0.0, "distillation_loss": 0.07784444838762283, "epoch": 2.73, "learning_rate": 3.351544978726805e-05, "loss": 0.0753, "step": 2873, "task_loss": 0.05243542417883873 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771387315410483, "compression_loss": 0.0, "distillation_loss": 0.050782375037670135, "epoch": 2.73, "learning_rate": 3.350543093314366e-05, "loss": 0.0468, "step": 2874, "task_loss": 0.01137268915772438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714027761547034, "compression_loss": 0.0, "distillation_loss": 0.1053369790315628, "epoch": 2.73, "learning_rate": 3.349541053396678e-05, "loss": 0.109, "step": 2875, "task_loss": 0.14205503463745117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714182313284882, "compression_loss": 0.0, "distillation_loss": 0.12449757009744644, "epoch": 2.73, "learning_rate": 3.348538859155766e-05, "loss": 0.1181, "step": 2876, "task_loss": 0.060163747519254684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771433680932841, "compression_loss": 0.0, "distillation_loss": 0.16609831154346466, "epoch": 2.73, "learning_rate": 3.347536510773684e-05, "loss": 0.1762, "step": 2877, "task_loss": 0.2674504816532135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714491249687656, "compression_loss": 0.0, "distillation_loss": 0.015941990539431572, "epoch": 2.73, "learning_rate": 3.346534008432513e-05, "loss": 0.0148, "step": 2878, "task_loss": 0.004297057166695595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714645634372655, "compression_loss": 0.0, "distillation_loss": 0.06580062955617905, "epoch": 2.73, "learning_rate": 3.3455313523143615e-05, "loss": 0.0644, "step": 2879, "task_loss": 0.05226878076791763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714799963393447, "compression_loss": 0.0, "distillation_loss": 0.041852548718452454, "epoch": 2.74, "learning_rate": 3.3445285426013685e-05, "loss": 0.0441, "step": 2880, "task_loss": 0.0639595314860344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7714954236760067, "compression_loss": 0.0, "distillation_loss": 0.14709192514419556, "epoch": 2.74, "learning_rate": 3.343525579475698e-05, "loss": 0.1519, "step": 2881, "task_loss": 0.19560851156711578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771510845448255, "compression_loss": 0.0, "distillation_loss": 0.02442905679345131, "epoch": 2.74, "learning_rate": 3.342522463119543e-05, "loss": 0.0226, "step": 2882, "task_loss": 0.00585402175784111 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7715262616570936, "compression_loss": 0.0, "distillation_loss": 0.08210425078868866, "epoch": 2.74, "learning_rate": 3.341519193715127e-05, "loss": 0.087, "step": 2883, "task_loss": 0.13087433576583862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771541672303526, "compression_loss": 0.0, "distillation_loss": 0.021984929218888283, "epoch": 2.74, "learning_rate": 3.340515771444695e-05, "loss": 0.0207, "step": 2884, "task_loss": 0.008725512772798538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771557077388556, "compression_loss": 0.0, "distillation_loss": 0.1168016642332077, "epoch": 2.74, "learning_rate": 3.3395121964905265e-05, "loss": 0.1205, "step": 2885, "task_loss": 0.15377360582351685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771572476913187, "compression_loss": 0.0, "distillation_loss": 0.16023914515972137, "epoch": 2.74, "learning_rate": 3.338508469034922e-05, "loss": 0.1587, "step": 2886, "task_loss": 0.14454258978366852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7715878708784231, "compression_loss": 0.0, "distillation_loss": 0.05907361954450607, "epoch": 2.74, "learning_rate": 3.3375045892602176e-05, "loss": 0.0606, "step": 2887, "task_loss": 0.07458890229463577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716032592852676, "compression_loss": 0.0, "distillation_loss": 0.0668545514345169, "epoch": 2.74, "learning_rate": 3.3365005573487706e-05, "loss": 0.0705, "step": 2888, "task_loss": 0.10356783866882324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716186421347244, "compression_loss": 0.0, "distillation_loss": 0.052939146757125854, "epoch": 2.74, "learning_rate": 3.335496373482969e-05, "loss": 0.0487, "step": 2889, "task_loss": 0.010425196960568428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716340194277972, "compression_loss": 0.0, "distillation_loss": 0.022259045392274857, "epoch": 2.74, "learning_rate": 3.334492037845227e-05, "loss": 0.0272, "step": 2890, "task_loss": 0.07185941934585571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716493911654896, "compression_loss": 0.0, "distillation_loss": 0.05123762786388397, "epoch": 2.75, "learning_rate": 3.333487550617987e-05, "loss": 0.0698, "step": 2891, "task_loss": 0.2367219626903534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716647573488052, "compression_loss": 0.0, "distillation_loss": 0.18512699007987976, "epoch": 2.75, "learning_rate": 3.332482911983721e-05, "loss": 0.1761, "step": 2892, "task_loss": 0.09492430090904236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716801179787479, "compression_loss": 0.0, "distillation_loss": 0.18022358417510986, "epoch": 2.75, "learning_rate": 3.331478122124924e-05, "loss": 0.1795, "step": 2893, "task_loss": 0.1729545146226883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7716954730563212, "compression_loss": 0.0, "distillation_loss": 0.020589305087924004, "epoch": 2.75, "learning_rate": 3.330473181224121e-05, "loss": 0.0191, "step": 2894, "task_loss": 0.005505530163645744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717108225825289, "compression_loss": 0.0, "distillation_loss": 0.030846502631902695, "epoch": 2.75, "learning_rate": 3.3294680894638655e-05, "loss": 0.0285, "step": 2895, "task_loss": 0.0070547014474868774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717261665583746, "compression_loss": 0.0, "distillation_loss": 0.020820213481783867, "epoch": 2.75, "learning_rate": 3.328462847026736e-05, "loss": 0.0287, "step": 2896, "task_loss": 0.09968282282352448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717415049848619, "compression_loss": 0.0, "distillation_loss": 0.09029317647218704, "epoch": 2.75, "learning_rate": 3.327457454095342e-05, "loss": 0.0924, "step": 2897, "task_loss": 0.11134281009435654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717568378629946, "compression_loss": 0.0, "distillation_loss": 0.06361924111843109, "epoch": 2.75, "learning_rate": 3.3264519108523154e-05, "loss": 0.0595, "step": 2898, "task_loss": 0.022628160193562508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717721651937766, "compression_loss": 0.0, "distillation_loss": 0.04663745313882828, "epoch": 2.75, "learning_rate": 3.3254462174803186e-05, "loss": 0.048, "step": 2899, "task_loss": 0.060523584485054016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7717874869782111, "compression_loss": 0.0, "distillation_loss": 0.23908516764640808, "epoch": 2.75, "learning_rate": 3.324440374162041e-05, "loss": 0.2375, "step": 2900, "task_loss": 0.22366894781589508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718028032173022, "compression_loss": 0.0, "distillation_loss": 0.07372735440731049, "epoch": 2.75, "learning_rate": 3.323434381080199e-05, "loss": 0.0802, "step": 2901, "task_loss": 0.13813573122024536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718181139120533, "compression_loss": 0.0, "distillation_loss": 0.04444555193185806, "epoch": 2.76, "learning_rate": 3.322428238417537e-05, "loss": 0.0548, "step": 2902, "task_loss": 0.1481703817844391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718334190634683, "compression_loss": 0.0, "distillation_loss": 0.10492336750030518, "epoch": 2.76, "learning_rate": 3.321421946356823e-05, "loss": 0.0998, "step": 2903, "task_loss": 0.05350606143474579 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718487186725508, "compression_loss": 0.0, "distillation_loss": 0.043171461671590805, "epoch": 2.76, "learning_rate": 3.320415505080858e-05, "loss": 0.0399, "step": 2904, "task_loss": 0.010258587077260017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718640127403044, "compression_loss": 0.0, "distillation_loss": 0.09338559955358505, "epoch": 2.76, "learning_rate": 3.3194089147724644e-05, "loss": 0.0905, "step": 2905, "task_loss": 0.0642678365111351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718793012677327, "compression_loss": 0.0, "distillation_loss": 0.07148714363574982, "epoch": 2.76, "learning_rate": 3.3184021756144954e-05, "loss": 0.0692, "step": 2906, "task_loss": 0.048625778406858444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7718945842558398, "compression_loss": 0.0, "distillation_loss": 0.053975023329257965, "epoch": 2.76, "learning_rate": 3.317395287789829e-05, "loss": 0.0522, "step": 2907, "task_loss": 0.03605438768863678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7719098617056289, "compression_loss": 0.0, "distillation_loss": 0.21236878633499146, "epoch": 2.76, "learning_rate": 3.316388251481373e-05, "loss": 0.2012, "step": 2908, "task_loss": 0.10068190097808838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771925133618104, "compression_loss": 0.0, "distillation_loss": 0.08693385124206543, "epoch": 2.76, "learning_rate": 3.3153810668720594e-05, "loss": 0.0805, "step": 2909, "task_loss": 0.022921759635210037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7719403999942687, "compression_loss": 0.0, "distillation_loss": 0.07356773316860199, "epoch": 2.76, "learning_rate": 3.3143737341448475e-05, "loss": 0.0745, "step": 2910, "task_loss": 0.08280356228351593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7719556608351267, "compression_loss": 0.0, "distillation_loss": 0.03629321604967117, "epoch": 2.76, "learning_rate": 3.3133662534827255e-05, "loss": 0.034, "step": 2911, "task_loss": 0.01335129514336586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7719709161416815, "compression_loss": 0.0, "distillation_loss": 0.08336710929870605, "epoch": 2.77, "learning_rate": 3.3123586250687055e-05, "loss": 0.078, "step": 2912, "task_loss": 0.02927432768046856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.771986165914937, "compression_loss": 0.0, "distillation_loss": 0.07923730462789536, "epoch": 2.77, "learning_rate": 3.311350849085829e-05, "loss": 0.0754, "step": 2913, "task_loss": 0.04064404219388962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720014101558967, "compression_loss": 0.0, "distillation_loss": 0.1845068484544754, "epoch": 2.77, "learning_rate": 3.3103429257171635e-05, "loss": 0.1826, "step": 2914, "task_loss": 0.16558828949928284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720166488655645, "compression_loss": 0.0, "distillation_loss": 0.03765151649713516, "epoch": 2.77, "learning_rate": 3.309334855145803e-05, "loss": 0.043, "step": 2915, "task_loss": 0.09107490628957748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720318820449439, "compression_loss": 0.0, "distillation_loss": 0.1513894945383072, "epoch": 2.77, "learning_rate": 3.3083266375548675e-05, "loss": 0.1645, "step": 2916, "task_loss": 0.2826227843761444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720471096950388, "compression_loss": 0.0, "distillation_loss": 0.10186254978179932, "epoch": 2.77, "learning_rate": 3.3073182731275064e-05, "loss": 0.098, "step": 2917, "task_loss": 0.06284405291080475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720623318168525, "compression_loss": 0.0, "distillation_loss": 0.18332624435424805, "epoch": 2.77, "learning_rate": 3.306309762046892e-05, "loss": 0.1762, "step": 2918, "task_loss": 0.1121426373720169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7720775484113891, "compression_loss": 0.0, "distillation_loss": 0.0718914344906807, "epoch": 2.77, "learning_rate": 3.305301104496227e-05, "loss": 0.0692, "step": 2919, "task_loss": 0.04449700936675072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772092759479652, "compression_loss": 0.0, "distillation_loss": 0.19091036915779114, "epoch": 2.77, "learning_rate": 3.3042923006587366e-05, "loss": 0.1988, "step": 2920, "task_loss": 0.26946884393692017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772107965022645, "compression_loss": 0.0, "distillation_loss": 0.05884380638599396, "epoch": 2.77, "learning_rate": 3.303283350717678e-05, "loss": 0.0645, "step": 2921, "task_loss": 0.11569136381149292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7721231650413717, "compression_loss": 0.0, "distillation_loss": 0.06322423368692398, "epoch": 2.77, "learning_rate": 3.302274254856329e-05, "loss": 0.06, "step": 2922, "task_loss": 0.031089693307876587 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772138359536836, "compression_loss": 0.0, "distillation_loss": 0.13263660669326782, "epoch": 2.78, "learning_rate": 3.301265013257998e-05, "loss": 0.1316, "step": 2923, "task_loss": 0.12230206280946732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7721535485100413, "compression_loss": 0.0, "distillation_loss": 0.1299370527267456, "epoch": 2.78, "learning_rate": 3.300255626106019e-05, "loss": 0.1243, "step": 2924, "task_loss": 0.07319293916225433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7721687319619914, "compression_loss": 0.0, "distillation_loss": 0.024539334699511528, "epoch": 2.78, "learning_rate": 3.2992460935837505e-05, "loss": 0.0226, "step": 2925, "task_loss": 0.005514673888683319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.77218390989369, "compression_loss": 0.0, "distillation_loss": 0.14706680178642273, "epoch": 2.78, "learning_rate": 3.2982364158745805e-05, "loss": 0.1399, "step": 2926, "task_loss": 0.07497966289520264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7721990823061408, "compression_loss": 0.0, "distillation_loss": 0.07125681638717651, "epoch": 2.78, "learning_rate": 3.297226593161921e-05, "loss": 0.0669, "step": 2927, "task_loss": 0.027698028832674026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7722142492003474, "compression_loss": 0.0, "distillation_loss": 0.25317683815956116, "epoch": 2.78, "learning_rate": 3.2962166256292113e-05, "loss": 0.2522, "step": 2928, "task_loss": 0.24299074709415436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7722294105773136, "compression_loss": 0.0, "distillation_loss": 0.10056150704622269, "epoch": 2.78, "learning_rate": 3.295206513459917e-05, "loss": 0.0971, "step": 2929, "task_loss": 0.06555324792861938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772244566438043, "compression_loss": 0.0, "distillation_loss": 0.10626856982707977, "epoch": 2.78, "learning_rate": 3.2941962568375296e-05, "loss": 0.1043, "step": 2930, "task_loss": 0.08672097325325012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7722597167835393, "compression_loss": 0.0, "distillation_loss": 0.02176981046795845, "epoch": 2.78, "learning_rate": 3.2931858559455674e-05, "loss": 0.043, "step": 2931, "task_loss": 0.23452335596084595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7722748616148062, "compression_loss": 0.0, "distillation_loss": 0.26498720049858093, "epoch": 2.78, "learning_rate": 3.292175310967575e-05, "loss": 0.2554, "step": 2932, "task_loss": 0.16884511709213257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7722900009328473, "compression_loss": 0.0, "distillation_loss": 0.020644748583436012, "epoch": 2.79, "learning_rate": 3.291164622087122e-05, "loss": 0.0191, "step": 2933, "task_loss": 0.005039013922214508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723051347386664, "compression_loss": 0.0, "distillation_loss": 0.02696690335869789, "epoch": 2.79, "learning_rate": 3.290153789487804e-05, "loss": 0.0264, "step": 2934, "task_loss": 0.02141895517706871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723202630332671, "compression_loss": 0.0, "distillation_loss": 0.056153517216444016, "epoch": 2.79, "learning_rate": 3.289142813353246e-05, "loss": 0.0626, "step": 2935, "task_loss": 0.12072312086820602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723353858176532, "compression_loss": 0.0, "distillation_loss": 0.09314046055078506, "epoch": 2.79, "learning_rate": 3.2881316938670945e-05, "loss": 0.1048, "step": 2936, "task_loss": 0.20927327871322632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723505030928282, "compression_loss": 0.0, "distillation_loss": 0.02801085263490677, "epoch": 2.79, "learning_rate": 3.2871204312130254e-05, "loss": 0.0387, "step": 2937, "task_loss": 0.13507309556007385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723656148597958, "compression_loss": 0.0, "distillation_loss": 0.0866575613617897, "epoch": 2.79, "learning_rate": 3.28610902557474e-05, "loss": 0.0907, "step": 2938, "task_loss": 0.12749417126178741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723807211195599, "compression_loss": 0.0, "distillation_loss": 0.11423015594482422, "epoch": 2.79, "learning_rate": 3.285097477135963e-05, "loss": 0.1059, "step": 2939, "task_loss": 0.030799927189946175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7723958218731239, "compression_loss": 0.0, "distillation_loss": 0.10484655201435089, "epoch": 2.79, "learning_rate": 3.284085786080449e-05, "loss": 0.1022, "step": 2940, "task_loss": 0.07836197316646576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7724109171214917, "compression_loss": 0.0, "distillation_loss": 0.03331822156906128, "epoch": 2.79, "learning_rate": 3.283073952591975e-05, "loss": 0.0453, "step": 2941, "task_loss": 0.1528467983007431 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772426006865667, "compression_loss": 0.0, "distillation_loss": 0.15390589833259583, "epoch": 2.79, "learning_rate": 3.2820619768543473e-05, "loss": 0.1486, "step": 2942, "task_loss": 0.10106901824474335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7724410911066533, "compression_loss": 0.0, "distillation_loss": 0.02205563150346279, "epoch": 2.79, "learning_rate": 3.281049859051394e-05, "loss": 0.0202, "step": 2943, "task_loss": 0.003351377323269844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7724561698454543, "compression_loss": 0.0, "distillation_loss": 0.04081364721059799, "epoch": 2.8, "learning_rate": 3.280037599366972e-05, "loss": 0.039, "step": 2944, "task_loss": 0.022568656131625175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7724712430830738, "compression_loss": 0.0, "distillation_loss": 0.10027671605348587, "epoch": 2.8, "learning_rate": 3.2790251979849654e-05, "loss": 0.0971, "step": 2945, "task_loss": 0.06809482723474503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7724863108205153, "compression_loss": 0.0, "distillation_loss": 0.11592331528663635, "epoch": 2.8, "learning_rate": 3.278012655089277e-05, "loss": 0.1258, "step": 2946, "task_loss": 0.21512287855148315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725013730587829, "compression_loss": 0.0, "distillation_loss": 0.11402192711830139, "epoch": 2.8, "learning_rate": 3.276999970863845e-05, "loss": 0.1178, "step": 2947, "task_loss": 0.1516398787498474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725164297988798, "compression_loss": 0.0, "distillation_loss": 0.039229217916727066, "epoch": 2.8, "learning_rate": 3.275987145492625e-05, "loss": 0.0364, "step": 2948, "task_loss": 0.010563917458057404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725314810418099, "compression_loss": 0.0, "distillation_loss": 0.179963618516922, "epoch": 2.8, "learning_rate": 3.274974179159603e-05, "loss": 0.181, "step": 2949, "task_loss": 0.1902376413345337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725465267885768, "compression_loss": 0.0, "distillation_loss": 0.08978709578514099, "epoch": 2.8, "learning_rate": 3.27396107204879e-05, "loss": 0.0879, "step": 2950, "task_loss": 0.07117792963981628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725615670401843, "compression_loss": 0.0, "distillation_loss": 0.15075881779193878, "epoch": 2.8, "learning_rate": 3.2729478243442194e-05, "loss": 0.1489, "step": 2951, "task_loss": 0.13251779973506927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725766017976361, "compression_loss": 0.0, "distillation_loss": 0.07806205749511719, "epoch": 2.8, "learning_rate": 3.271934436229955e-05, "loss": 0.0817, "step": 2952, "task_loss": 0.11416235566139221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7725916310619357, "compression_loss": 0.0, "distillation_loss": 0.23699919879436493, "epoch": 2.8, "learning_rate": 3.270920907890082e-05, "loss": 0.2343, "step": 2953, "task_loss": 0.21003299951553345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772606654834087, "compression_loss": 0.0, "distillation_loss": 0.09724032133817673, "epoch": 2.81, "learning_rate": 3.269907239508714e-05, "loss": 0.0913, "step": 2954, "task_loss": 0.037794340401887894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726216731150934, "compression_loss": 0.0, "distillation_loss": 0.11641349643468857, "epoch": 2.81, "learning_rate": 3.268893431269987e-05, "loss": 0.1332, "step": 2955, "task_loss": 0.2846828103065491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726366859059589, "compression_loss": 0.0, "distillation_loss": 0.13409125804901123, "epoch": 2.81, "learning_rate": 3.2678794833580654e-05, "loss": 0.1328, "step": 2956, "task_loss": 0.1211758628487587 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726516932076869, "compression_loss": 0.0, "distillation_loss": 0.1196846067905426, "epoch": 2.81, "learning_rate": 3.2668653959571384e-05, "loss": 0.1139, "step": 2957, "task_loss": 0.06228470057249069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726666950212814, "compression_loss": 0.0, "distillation_loss": 0.12382517009973526, "epoch": 2.81, "learning_rate": 3.2658511692514184e-05, "loss": 0.1171, "step": 2958, "task_loss": 0.056750208139419556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726816913477457, "compression_loss": 0.0, "distillation_loss": 0.07181763648986816, "epoch": 2.81, "learning_rate": 3.2648368034251454e-05, "loss": 0.073, "step": 2959, "task_loss": 0.0838204026222229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7726966821880837, "compression_loss": 0.0, "distillation_loss": 0.19776178896427155, "epoch": 2.81, "learning_rate": 3.263822298662583e-05, "loss": 0.1927, "step": 2960, "task_loss": 0.14683988690376282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727116675432992, "compression_loss": 0.0, "distillation_loss": 0.1264166533946991, "epoch": 2.81, "learning_rate": 3.2628076551480216e-05, "loss": 0.1285, "step": 2961, "task_loss": 0.14740031957626343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727266474143957, "compression_loss": 0.0, "distillation_loss": 0.18037302792072296, "epoch": 2.81, "learning_rate": 3.2617928730657764e-05, "loss": 0.189, "step": 2962, "task_loss": 0.2670821249485016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727416218023769, "compression_loss": 0.0, "distillation_loss": 0.02856997214257717, "epoch": 2.81, "learning_rate": 3.260777952600186e-05, "loss": 0.0308, "step": 2963, "task_loss": 0.051173169165849686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727565907082465, "compression_loss": 0.0, "distillation_loss": 0.20055626332759857, "epoch": 2.81, "learning_rate": 3.2597628939356175e-05, "loss": 0.1923, "step": 2964, "task_loss": 0.11836535483598709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727715541330082, "compression_loss": 0.0, "distillation_loss": 0.10277973115444183, "epoch": 2.82, "learning_rate": 3.25874769725646e-05, "loss": 0.1018, "step": 2965, "task_loss": 0.09281755983829498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7727865120776657, "compression_loss": 0.0, "distillation_loss": 0.07278560847043991, "epoch": 2.82, "learning_rate": 3.257732362747129e-05, "loss": 0.0731, "step": 2966, "task_loss": 0.07589083909988403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728014645432226, "compression_loss": 0.0, "distillation_loss": 0.05969984084367752, "epoch": 2.82, "learning_rate": 3.256716890592065e-05, "loss": 0.0624, "step": 2967, "task_loss": 0.08681820333003998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728164115306826, "compression_loss": 0.0, "distillation_loss": 0.1937519758939743, "epoch": 2.82, "learning_rate": 3.255701280975733e-05, "loss": 0.1939, "step": 2968, "task_loss": 0.19502577185630798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728313530410496, "compression_loss": 0.0, "distillation_loss": 0.14739632606506348, "epoch": 2.82, "learning_rate": 3.2546855340826246e-05, "loss": 0.1487, "step": 2969, "task_loss": 0.1602475941181183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728462890753269, "compression_loss": 0.0, "distillation_loss": 0.16489112377166748, "epoch": 2.82, "learning_rate": 3.253669650097254e-05, "loss": 0.1558, "step": 2970, "task_loss": 0.07406702637672424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728612196345185, "compression_loss": 0.0, "distillation_loss": 0.10662008821964264, "epoch": 2.82, "learning_rate": 3.2526536292041625e-05, "loss": 0.1111, "step": 2971, "task_loss": 0.1511189192533493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772876144719628, "compression_loss": 0.0, "distillation_loss": 0.0431804321706295, "epoch": 2.82, "learning_rate": 3.2516374715879126e-05, "loss": 0.0418, "step": 2972, "task_loss": 0.02895108424127102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7728910643316589, "compression_loss": 0.0, "distillation_loss": 0.1767425686120987, "epoch": 2.82, "learning_rate": 3.250621177433097e-05, "loss": 0.1788, "step": 2973, "task_loss": 0.197010338306427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.772905978471615, "compression_loss": 0.0, "distillation_loss": 0.12274418771266937, "epoch": 2.82, "learning_rate": 3.249604746924331e-05, "loss": 0.1189, "step": 2974, "task_loss": 0.08417470753192902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729208871405002, "compression_loss": 0.0, "distillation_loss": 0.023153632879257202, "epoch": 2.83, "learning_rate": 3.248588180246251e-05, "loss": 0.0321, "step": 2975, "task_loss": 0.11221934109926224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729357903393179, "compression_loss": 0.0, "distillation_loss": 0.24052870273590088, "epoch": 2.83, "learning_rate": 3.247571477583523e-05, "loss": 0.248, "step": 2976, "task_loss": 0.31512880325317383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729506880690719, "compression_loss": 0.0, "distillation_loss": 0.11706255376338959, "epoch": 2.83, "learning_rate": 3.2465546391208355e-05, "loss": 0.1414, "step": 2977, "task_loss": 0.36023640632629395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729655803307658, "compression_loss": 0.0, "distillation_loss": 0.24018515646457672, "epoch": 2.83, "learning_rate": 3.245537665042903e-05, "loss": 0.2329, "step": 2978, "task_loss": 0.1674792766571045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729804671254034, "compression_loss": 0.0, "distillation_loss": 0.12150129675865173, "epoch": 2.83, "learning_rate": 3.244520555534463e-05, "loss": 0.1159, "step": 2979, "task_loss": 0.06535904854536057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7729953484539883, "compression_loss": 0.0, "distillation_loss": 0.09363529086112976, "epoch": 2.83, "learning_rate": 3.243503310780278e-05, "loss": 0.0944, "step": 2980, "task_loss": 0.10123631358146667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730102243175242, "compression_loss": 0.0, "distillation_loss": 0.060428209602832794, "epoch": 2.83, "learning_rate": 3.242485930965136e-05, "loss": 0.0578, "step": 2981, "task_loss": 0.03405189514160156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730250947170147, "compression_loss": 0.0, "distillation_loss": 0.03489404916763306, "epoch": 2.83, "learning_rate": 3.241468416273849e-05, "loss": 0.0514, "step": 2982, "task_loss": 0.2001369595527649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730399596534637, "compression_loss": 0.0, "distillation_loss": 0.026621265336871147, "epoch": 2.83, "learning_rate": 3.2404507668912534e-05, "loss": 0.0257, "step": 2983, "task_loss": 0.017897440120577812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730548191278748, "compression_loss": 0.0, "distillation_loss": 0.06251438707113266, "epoch": 2.83, "learning_rate": 3.2394329830022095e-05, "loss": 0.0658, "step": 2984, "task_loss": 0.0951443687081337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730696731412514, "compression_loss": 0.0, "distillation_loss": 0.06263174116611481, "epoch": 2.83, "learning_rate": 3.238415064791603e-05, "loss": 0.0604, "step": 2985, "task_loss": 0.03981752693653107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730845216945976, "compression_loss": 0.0, "distillation_loss": 0.04605867341160774, "epoch": 2.84, "learning_rate": 3.237397012444344e-05, "loss": 0.056, "step": 2986, "task_loss": 0.145818829536438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7730993647889168, "compression_loss": 0.0, "distillation_loss": 0.08241648226976395, "epoch": 2.84, "learning_rate": 3.2363788261453664e-05, "loss": 0.0888, "step": 2987, "task_loss": 0.14630846679210663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7731142024252129, "compression_loss": 0.0, "distillation_loss": 0.027548307552933693, "epoch": 2.84, "learning_rate": 3.2353605060796286e-05, "loss": 0.0253, "step": 2988, "task_loss": 0.004967987537384033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7731290346044893, "compression_loss": 0.0, "distillation_loss": 0.17513786256313324, "epoch": 2.84, "learning_rate": 3.2343420524321134e-05, "loss": 0.1683, "step": 2989, "task_loss": 0.1069210097193718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.77314386132775, "compression_loss": 0.0, "distillation_loss": 0.0309540294110775, "epoch": 2.84, "learning_rate": 3.2333234653878275e-05, "loss": 0.033, "step": 2990, "task_loss": 0.051752254366874695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7731586825959984, "compression_loss": 0.0, "distillation_loss": 0.07692794501781464, "epoch": 2.84, "learning_rate": 3.2323047451318023e-05, "loss": 0.0828, "step": 2991, "task_loss": 0.1354198455810547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7731734984102384, "compression_loss": 0.0, "distillation_loss": 0.23258818686008453, "epoch": 2.84, "learning_rate": 3.2312858918490936e-05, "loss": 0.2235, "step": 2992, "task_loss": 0.1417001634836197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7731883087714736, "compression_loss": 0.0, "distillation_loss": 0.07296988368034363, "epoch": 2.84, "learning_rate": 3.2302669057247806e-05, "loss": 0.0679, "step": 2993, "task_loss": 0.02189962938427925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7732031136807076, "compression_loss": 0.0, "distillation_loss": 0.13466548919677734, "epoch": 2.84, "learning_rate": 3.2292477869439666e-05, "loss": 0.1294, "step": 2994, "task_loss": 0.08172232657670975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7732179131389442, "compression_loss": 0.0, "distillation_loss": 0.1007700264453888, "epoch": 2.84, "learning_rate": 3.228228535691781e-05, "loss": 0.0913, "step": 2995, "task_loss": 0.0059202127158641815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773232707147187, "compression_loss": 0.0, "distillation_loss": 0.13270485401153564, "epoch": 2.85, "learning_rate": 3.2272091521533745e-05, "loss": 0.1522, "step": 2996, "task_loss": 0.3279738426208496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7732474957064397, "compression_loss": 0.0, "distillation_loss": 0.09864410758018494, "epoch": 2.85, "learning_rate": 3.226189636513923e-05, "loss": 0.0946, "step": 2997, "task_loss": 0.05842628702521324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773262278817706, "compression_loss": 0.0, "distillation_loss": 0.14679734408855438, "epoch": 2.85, "learning_rate": 3.225169988958627e-05, "loss": 0.1396, "step": 2998, "task_loss": 0.07466141879558563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7732770564819896, "compression_loss": 0.0, "distillation_loss": 0.09835858643054962, "epoch": 2.85, "learning_rate": 3.2241502096727095e-05, "loss": 0.0922, "step": 2999, "task_loss": 0.03713885694742203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7732918287002941, "compression_loss": 0.0, "distillation_loss": 0.08745696395635605, "epoch": 2.85, "learning_rate": 3.2231302988414194e-05, "loss": 0.0818, "step": 3000, "task_loss": 0.030404910445213318 }, { "epoch": 2.85, "eval_accuracy": 0.8899082568807339, "eval_loss": 0.41417789459228516, "eval_runtime": 17.9806, "eval_samples_per_second": 48.497, "eval_steps_per_second": 6.062, "step": 3000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733065954736233, "compression_loss": 0.0, "distillation_loss": 0.16211673617362976, "epoch": 2.85, "learning_rate": 3.222110256650028e-05, "loss": 0.1516, "step": 3001, "task_loss": 0.05707669258117676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733213568029809, "compression_loss": 0.0, "distillation_loss": 0.10503898561000824, "epoch": 2.85, "learning_rate": 3.2210900832838295e-05, "loss": 0.1082, "step": 3002, "task_loss": 0.13650718331336975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733361126893704, "compression_loss": 0.0, "distillation_loss": 0.037988051772117615, "epoch": 2.85, "learning_rate": 3.220069778928146e-05, "loss": 0.0421, "step": 3003, "task_loss": 0.0787007063627243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733508631337956, "compression_loss": 0.0, "distillation_loss": 0.10283227264881134, "epoch": 2.85, "learning_rate": 3.2190493437683185e-05, "loss": 0.1037, "step": 3004, "task_loss": 0.11105208098888397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733656081372603, "compression_loss": 0.0, "distillation_loss": 0.1534404307603836, "epoch": 2.85, "learning_rate": 3.2180287779897155e-05, "loss": 0.1409, "step": 3005, "task_loss": 0.028170330449938774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733803477007679, "compression_loss": 0.0, "distillation_loss": 0.03912707045674324, "epoch": 2.85, "learning_rate": 3.217008081777726e-05, "loss": 0.0443, "step": 3006, "task_loss": 0.09075871109962463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7733950818253224, "compression_loss": 0.0, "distillation_loss": 0.08084475994110107, "epoch": 2.86, "learning_rate": 3.2159872553177655e-05, "loss": 0.0756, "step": 3007, "task_loss": 0.028212103992700577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734098105119271, "compression_loss": 0.0, "distillation_loss": 0.09292879700660706, "epoch": 2.86, "learning_rate": 3.2149662987952725e-05, "loss": 0.0965, "step": 3008, "task_loss": 0.12906073033809662 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734245337615862, "compression_loss": 0.0, "distillation_loss": 0.04876542091369629, "epoch": 2.86, "learning_rate": 3.213945212395707e-05, "loss": 0.0478, "step": 3009, "task_loss": 0.039374131709337234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734392515753028, "compression_loss": 0.0, "distillation_loss": 0.061346620321273804, "epoch": 2.86, "learning_rate": 3.212923996304556e-05, "loss": 0.0648, "step": 3010, "task_loss": 0.09587064385414124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734539639540811, "compression_loss": 0.0, "distillation_loss": 0.18387816846370697, "epoch": 2.86, "learning_rate": 3.211902650707327e-05, "loss": 0.1763, "step": 3011, "task_loss": 0.10823600739240646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734686708989243, "compression_loss": 0.0, "distillation_loss": 0.08546897023916245, "epoch": 2.86, "learning_rate": 3.210881175789553e-05, "loss": 0.0837, "step": 3012, "task_loss": 0.06747668981552124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734833724108365, "compression_loss": 0.0, "distillation_loss": 0.26206955313682556, "epoch": 2.86, "learning_rate": 3.209859571736791e-05, "loss": 0.2582, "step": 3013, "task_loss": 0.2230200469493866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7734980684908211, "compression_loss": 0.0, "distillation_loss": 0.02551981247961521, "epoch": 2.86, "learning_rate": 3.208837838734618e-05, "loss": 0.0314, "step": 3014, "task_loss": 0.08437542617321014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773512759139882, "compression_loss": 0.0, "distillation_loss": 0.11942901462316513, "epoch": 2.86, "learning_rate": 3.207815976968638e-05, "loss": 0.1173, "step": 3015, "task_loss": 0.0981997698545456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7735274443590228, "compression_loss": 0.0, "distillation_loss": 0.03865540400147438, "epoch": 2.86, "learning_rate": 3.2067939866244764e-05, "loss": 0.0452, "step": 3016, "task_loss": 0.10382010787725449 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7735421241492471, "compression_loss": 0.0, "distillation_loss": 0.1100415512919426, "epoch": 2.87, "learning_rate": 3.205771867887784e-05, "loss": 0.113, "step": 3017, "task_loss": 0.13981683552265167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7735567985115586, "compression_loss": 0.0, "distillation_loss": 0.19163449108600616, "epoch": 2.87, "learning_rate": 3.204749620944232e-05, "loss": 0.1836, "step": 3018, "task_loss": 0.11117150634527206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773571467446961, "compression_loss": 0.0, "distillation_loss": 0.042591970413923264, "epoch": 2.87, "learning_rate": 3.203727245979518e-05, "loss": 0.0501, "step": 3019, "task_loss": 0.11770500242710114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7735861309564581, "compression_loss": 0.0, "distillation_loss": 0.17520135641098022, "epoch": 2.87, "learning_rate": 3.20270474317936e-05, "loss": 0.173, "step": 3020, "task_loss": 0.15289539098739624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736007890410534, "compression_loss": 0.0, "distillation_loss": 0.058556366711854935, "epoch": 2.87, "learning_rate": 3.201682112729502e-05, "loss": 0.0701, "step": 3021, "task_loss": 0.17429102957248688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736154417017508, "compression_loss": 0.0, "distillation_loss": 0.12820349633693695, "epoch": 2.87, "learning_rate": 3.2006593548157074e-05, "loss": 0.1274, "step": 3022, "task_loss": 0.11984600871801376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736300889395538, "compression_loss": 0.0, "distillation_loss": 0.12782520055770874, "epoch": 2.87, "learning_rate": 3.1996364696237676e-05, "loss": 0.1207, "step": 3023, "task_loss": 0.05618831515312195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736447307554661, "compression_loss": 0.0, "distillation_loss": 0.073527991771698, "epoch": 2.87, "learning_rate": 3.198613457339493e-05, "loss": 0.0798, "step": 3024, "task_loss": 0.13668204843997955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736593671504914, "compression_loss": 0.0, "distillation_loss": 0.017919428646564484, "epoch": 2.87, "learning_rate": 3.19759031814872e-05, "loss": 0.0165, "step": 3025, "task_loss": 0.0033756643533706665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736739981256334, "compression_loss": 0.0, "distillation_loss": 0.03155189007520676, "epoch": 2.87, "learning_rate": 3.196567052237306e-05, "loss": 0.0288, "step": 3026, "task_loss": 0.003785140812397003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7736886236818958, "compression_loss": 0.0, "distillation_loss": 0.10638611018657684, "epoch": 2.87, "learning_rate": 3.195543659791132e-05, "loss": 0.1069, "step": 3027, "task_loss": 0.11161436140537262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737032438202823, "compression_loss": 0.0, "distillation_loss": 0.19940927624702454, "epoch": 2.88, "learning_rate": 3.194520140996102e-05, "loss": 0.1983, "step": 3028, "task_loss": 0.1886705756187439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737178585417965, "compression_loss": 0.0, "distillation_loss": 0.0578513965010643, "epoch": 2.88, "learning_rate": 3.193496496038144e-05, "loss": 0.0666, "step": 3029, "task_loss": 0.1448466032743454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737324678474422, "compression_loss": 0.0, "distillation_loss": 0.18462207913398743, "epoch": 2.88, "learning_rate": 3.1924727251032075e-05, "loss": 0.1751, "step": 3030, "task_loss": 0.08917995542287827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737470717382229, "compression_loss": 0.0, "distillation_loss": 0.0979708880186081, "epoch": 2.88, "learning_rate": 3.191448828377267e-05, "loss": 0.1038, "step": 3031, "task_loss": 0.15621528029441833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737616702151424, "compression_loss": 0.0, "distillation_loss": 0.06346909701824188, "epoch": 2.88, "learning_rate": 3.1904248060463146e-05, "loss": 0.0632, "step": 3032, "task_loss": 0.06106296554207802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737762632792045, "compression_loss": 0.0, "distillation_loss": 0.08692383021116257, "epoch": 2.88, "learning_rate": 3.189400658296372e-05, "loss": 0.0864, "step": 3033, "task_loss": 0.08201731741428375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7737908509314126, "compression_loss": 0.0, "distillation_loss": 0.06603842228651047, "epoch": 2.88, "learning_rate": 3.188376385313479e-05, "loss": 0.0611, "step": 3034, "task_loss": 0.016200372949242592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738054331727706, "compression_loss": 0.0, "distillation_loss": 0.1842041015625, "epoch": 2.88, "learning_rate": 3.187351987283701e-05, "loss": 0.1758, "step": 3035, "task_loss": 0.10015082359313965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738200100042821, "compression_loss": 0.0, "distillation_loss": 0.1197400689125061, "epoch": 2.88, "learning_rate": 3.1863274643931244e-05, "loss": 0.1273, "step": 3036, "task_loss": 0.19555895030498505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738345814269508, "compression_loss": 0.0, "distillation_loss": 0.24975571036338806, "epoch": 2.88, "learning_rate": 3.185302816827858e-05, "loss": 0.242, "step": 3037, "task_loss": 0.17197510600090027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738491474417805, "compression_loss": 0.0, "distillation_loss": 0.0718345120549202, "epoch": 2.89, "learning_rate": 3.184278044774035e-05, "loss": 0.0706, "step": 3038, "task_loss": 0.0589919239282608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738637080497746, "compression_loss": 0.0, "distillation_loss": 0.17170128226280212, "epoch": 2.89, "learning_rate": 3.183253148417808e-05, "loss": 0.1797, "step": 3039, "task_loss": 0.25216758251190186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773878263251937, "compression_loss": 0.0, "distillation_loss": 0.17050856351852417, "epoch": 2.89, "learning_rate": 3.182228127945358e-05, "loss": 0.1782, "step": 3040, "task_loss": 0.2472606599330902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7738928130492714, "compression_loss": 0.0, "distillation_loss": 0.09137849509716034, "epoch": 2.89, "learning_rate": 3.1812029835428825e-05, "loss": 0.0876, "step": 3041, "task_loss": 0.053437259048223495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739073574427813, "compression_loss": 0.0, "distillation_loss": 0.08643493056297302, "epoch": 2.89, "learning_rate": 3.1801777153966034e-05, "loss": 0.1166, "step": 3042, "task_loss": 0.3879551291465759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739218964334705, "compression_loss": 0.0, "distillation_loss": 0.06834743916988373, "epoch": 2.89, "learning_rate": 3.179152323692767e-05, "loss": 0.0775, "step": 3043, "task_loss": 0.16033974289894104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739364300223428, "compression_loss": 0.0, "distillation_loss": 0.07843412458896637, "epoch": 2.89, "learning_rate": 3.1781268086176406e-05, "loss": 0.0788, "step": 3044, "task_loss": 0.08182865381240845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739509582104016, "compression_loss": 0.0, "distillation_loss": 0.09358078241348267, "epoch": 2.89, "learning_rate": 3.177101170357513e-05, "loss": 0.0883, "step": 3045, "task_loss": 0.04088345542550087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739654809986508, "compression_loss": 0.0, "distillation_loss": 0.030289195477962494, "epoch": 2.89, "learning_rate": 3.1760754090986975e-05, "loss": 0.028, "step": 3046, "task_loss": 0.007276715710759163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7739799983880941, "compression_loss": 0.0, "distillation_loss": 0.06697987020015717, "epoch": 2.89, "learning_rate": 3.175049525027527e-05, "loss": 0.0772, "step": 3047, "task_loss": 0.16944551467895508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.773994510379735, "compression_loss": 0.0, "distillation_loss": 0.09630005806684494, "epoch": 2.89, "learning_rate": 3.17402351833036e-05, "loss": 0.0926, "step": 3048, "task_loss": 0.059294380247592926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740090169745774, "compression_loss": 0.0, "distillation_loss": 0.04504215717315674, "epoch": 2.9, "learning_rate": 3.1729973891935745e-05, "loss": 0.0579, "step": 3049, "task_loss": 0.17366653680801392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740235181736247, "compression_loss": 0.0, "distillation_loss": 0.024151597172021866, "epoch": 2.9, "learning_rate": 3.1719711378035714e-05, "loss": 0.0223, "step": 3050, "task_loss": 0.005144516006112099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740380139778809, "compression_loss": 0.0, "distillation_loss": 0.07286585122346878, "epoch": 2.9, "learning_rate": 3.1709447643467755e-05, "loss": 0.0733, "step": 3051, "task_loss": 0.07735887914896011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740525043883495, "compression_loss": 0.0, "distillation_loss": 0.12737995386123657, "epoch": 2.9, "learning_rate": 3.1699182690096316e-05, "loss": 0.1227, "step": 3052, "task_loss": 0.08107961714267731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740669894060341, "compression_loss": 0.0, "distillation_loss": 0.13144919276237488, "epoch": 2.9, "learning_rate": 3.168891651978609e-05, "loss": 0.1325, "step": 3053, "task_loss": 0.14158479869365692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740814690319386, "compression_loss": 0.0, "distillation_loss": 0.04931913688778877, "epoch": 2.9, "learning_rate": 3.167864913440195e-05, "loss": 0.0455, "step": 3054, "task_loss": 0.011026356369256973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7740959432670665, "compression_loss": 0.0, "distillation_loss": 0.032145917415618896, "epoch": 2.9, "learning_rate": 3.1668380535809036e-05, "loss": 0.0341, "step": 3055, "task_loss": 0.05174173414707184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741104121124217, "compression_loss": 0.0, "distillation_loss": 0.04694236442446709, "epoch": 2.9, "learning_rate": 3.165811072587267e-05, "loss": 0.0486, "step": 3056, "task_loss": 0.06309117376804352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741248755690077, "compression_loss": 0.0, "distillation_loss": 0.13567635416984558, "epoch": 2.9, "learning_rate": 3.164783970645845e-05, "loss": 0.1325, "step": 3057, "task_loss": 0.10343907028436661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741393336378282, "compression_loss": 0.0, "distillation_loss": 0.11043036729097366, "epoch": 2.9, "learning_rate": 3.1637567479432113e-05, "loss": 0.1153, "step": 3058, "task_loss": 0.15953701734542847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741537863198868, "compression_loss": 0.0, "distillation_loss": 0.06733449548482895, "epoch": 2.91, "learning_rate": 3.1627294046659675e-05, "loss": 0.0654, "step": 3059, "task_loss": 0.04814068600535393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741682336161875, "compression_loss": 0.0, "distillation_loss": 0.030272886157035828, "epoch": 2.91, "learning_rate": 3.1617019410007366e-05, "loss": 0.0279, "step": 3060, "task_loss": 0.006075674667954445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7741826755277337, "compression_loss": 0.0, "distillation_loss": 0.16811983287334442, "epoch": 2.91, "learning_rate": 3.16067435713416e-05, "loss": 0.1535, "step": 3061, "task_loss": 0.021725336089730263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.774197112055529, "compression_loss": 0.0, "distillation_loss": 0.024632154032588005, "epoch": 2.91, "learning_rate": 3.159646653252906e-05, "loss": 0.0229, "step": 3062, "task_loss": 0.007006222382187843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742115432005774, "compression_loss": 0.0, "distillation_loss": 0.19798026978969574, "epoch": 2.91, "learning_rate": 3.1586188295436594e-05, "loss": 0.1975, "step": 3063, "task_loss": 0.19281329214572906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742259689638823, "compression_loss": 0.0, "distillation_loss": 0.08180099725723267, "epoch": 2.91, "learning_rate": 3.1575908861931314e-05, "loss": 0.0904, "step": 3064, "task_loss": 0.16796118021011353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742403893464476, "compression_loss": 0.0, "distillation_loss": 0.0645010769367218, "epoch": 2.91, "learning_rate": 3.156562823388051e-05, "loss": 0.0614, "step": 3065, "task_loss": 0.03324957937002182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742548043492768, "compression_loss": 0.0, "distillation_loss": 0.10893145948648453, "epoch": 2.91, "learning_rate": 3.155534641315172e-05, "loss": 0.1009, "step": 3066, "task_loss": 0.028400206938385963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742692139733738, "compression_loss": 0.0, "distillation_loss": 0.032349854707717896, "epoch": 2.91, "learning_rate": 3.154506340161269e-05, "loss": 0.0305, "step": 3067, "task_loss": 0.013784002512693405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.774283618219742, "compression_loss": 0.0, "distillation_loss": 0.19412454962730408, "epoch": 2.91, "learning_rate": 3.1534779201131366e-05, "loss": 0.1937, "step": 3068, "task_loss": 0.1897927224636078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7742980170893853, "compression_loss": 0.0, "distillation_loss": 0.02269315905869007, "epoch": 2.91, "learning_rate": 3.152449381357593e-05, "loss": 0.021, "step": 3069, "task_loss": 0.005848288536071777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743124105833072, "compression_loss": 0.0, "distillation_loss": 0.17387601733207703, "epoch": 2.92, "learning_rate": 3.151420724081478e-05, "loss": 0.1707, "step": 3070, "task_loss": 0.14171260595321655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743267987025116, "compression_loss": 0.0, "distillation_loss": 0.0965137854218483, "epoch": 2.92, "learning_rate": 3.1503919484716495e-05, "loss": 0.0902, "step": 3071, "task_loss": 0.03351738303899765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743411814480021, "compression_loss": 0.0, "distillation_loss": 0.05635223537683487, "epoch": 2.92, "learning_rate": 3.149363054714992e-05, "loss": 0.056, "step": 3072, "task_loss": 0.05248015746474266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743555588207823, "compression_loss": 0.0, "distillation_loss": 0.12340125441551208, "epoch": 2.92, "learning_rate": 3.148334042998408e-05, "loss": 0.1179, "step": 3073, "task_loss": 0.06886960566043854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.774369930821856, "compression_loss": 0.0, "distillation_loss": 0.06106632202863693, "epoch": 2.92, "learning_rate": 3.1473049135088225e-05, "loss": 0.0575, "step": 3074, "task_loss": 0.025458548218011856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743842974522268, "compression_loss": 0.0, "distillation_loss": 0.04669322818517685, "epoch": 2.92, "learning_rate": 3.146275666433183e-05, "loss": 0.0486, "step": 3075, "task_loss": 0.06542003899812698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7743986587128984, "compression_loss": 0.0, "distillation_loss": 0.15752683579921722, "epoch": 2.92, "learning_rate": 3.145246301958455e-05, "loss": 0.1552, "step": 3076, "task_loss": 0.13474304974079132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744130146048744, "compression_loss": 0.0, "distillation_loss": 0.040889352560043335, "epoch": 2.92, "learning_rate": 3.14421682027163e-05, "loss": 0.0456, "step": 3077, "task_loss": 0.08812226355075836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744273651291587, "compression_loss": 0.0, "distillation_loss": 0.07757270336151123, "epoch": 2.92, "learning_rate": 3.143187221559715e-05, "loss": 0.0816, "step": 3078, "task_loss": 0.11832978576421738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744417102867547, "compression_loss": 0.0, "distillation_loss": 0.04706088453531265, "epoch": 2.92, "learning_rate": 3.142157506009746e-05, "loss": 0.0517, "step": 3079, "task_loss": 0.0932609811425209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744560500786662, "compression_loss": 0.0, "distillation_loss": 0.17205679416656494, "epoch": 2.92, "learning_rate": 3.141127673808772e-05, "loss": 0.1766, "step": 3080, "task_loss": 0.21797949075698853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744703845058971, "compression_loss": 0.0, "distillation_loss": 0.11091163754463196, "epoch": 2.93, "learning_rate": 3.140097725143868e-05, "loss": 0.1031, "step": 3081, "task_loss": 0.03264191001653671 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744847135694508, "compression_loss": 0.0, "distillation_loss": 0.17644111812114716, "epoch": 2.93, "learning_rate": 3.13906766020213e-05, "loss": 0.1904, "step": 3082, "task_loss": 0.3157804012298584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7744990372703311, "compression_loss": 0.0, "distillation_loss": 0.037624701857566833, "epoch": 2.93, "learning_rate": 3.138037479170674e-05, "loss": 0.0394, "step": 3083, "task_loss": 0.05507116764783859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745133556095416, "compression_loss": 0.0, "distillation_loss": 0.14321953058242798, "epoch": 2.93, "learning_rate": 3.137007182236637e-05, "loss": 0.1347, "step": 3084, "task_loss": 0.05840989202260971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745276685880862, "compression_loss": 0.0, "distillation_loss": 0.03971727564930916, "epoch": 2.93, "learning_rate": 3.1359767695871767e-05, "loss": 0.0431, "step": 3085, "task_loss": 0.07382907718420029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745419762069683, "compression_loss": 0.0, "distillation_loss": 0.3982169032096863, "epoch": 2.93, "learning_rate": 3.134946241409474e-05, "loss": 0.3973, "step": 3086, "task_loss": 0.3887830376625061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745562784671917, "compression_loss": 0.0, "distillation_loss": 0.03360104188323021, "epoch": 2.93, "learning_rate": 3.133915597890729e-05, "loss": 0.0468, "step": 3087, "task_loss": 0.1660730242729187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.77457057536976, "compression_loss": 0.0, "distillation_loss": 0.030526025220751762, "epoch": 2.93, "learning_rate": 3.132884839218162e-05, "loss": 0.0331, "step": 3088, "task_loss": 0.056509580463171005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745848669156772, "compression_loss": 0.0, "distillation_loss": 0.02792483940720558, "epoch": 2.93, "learning_rate": 3.131853965579016e-05, "loss": 0.0285, "step": 3089, "task_loss": 0.03412621468305588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7745991531059465, "compression_loss": 0.0, "distillation_loss": 0.10251462459564209, "epoch": 2.93, "learning_rate": 3.130822977160554e-05, "loss": 0.0957, "step": 3090, "task_loss": 0.03391581028699875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.774613433941572, "compression_loss": 0.0, "distillation_loss": 0.04014907404780388, "epoch": 2.94, "learning_rate": 3.129791874150062e-05, "loss": 0.0426, "step": 3091, "task_loss": 0.0648120641708374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746277094235572, "compression_loss": 0.0, "distillation_loss": 0.0542302206158638, "epoch": 2.94, "learning_rate": 3.1287606567348406e-05, "loss": 0.0595, "step": 3092, "task_loss": 0.10736650973558426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746419795529058, "compression_loss": 0.0, "distillation_loss": 0.02264413796365261, "epoch": 2.94, "learning_rate": 3.1277293251022185e-05, "loss": 0.0265, "step": 3093, "task_loss": 0.0615130253136158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746562443306214, "compression_loss": 0.0, "distillation_loss": 0.16324403882026672, "epoch": 2.94, "learning_rate": 3.126697879439541e-05, "loss": 0.1576, "step": 3094, "task_loss": 0.10708633810281754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746705037577079, "compression_loss": 0.0, "distillation_loss": 0.025752229616045952, "epoch": 2.94, "learning_rate": 3.1256663199341764e-05, "loss": 0.0244, "step": 3095, "task_loss": 0.012307420372962952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746847578351687, "compression_loss": 0.0, "distillation_loss": 0.02341257967054844, "epoch": 2.94, "learning_rate": 3.124634646773511e-05, "loss": 0.0219, "step": 3096, "task_loss": 0.008523575961589813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7746990065640077, "compression_loss": 0.0, "distillation_loss": 0.11951486021280289, "epoch": 2.94, "learning_rate": 3.1236028601449534e-05, "loss": 0.1255, "step": 3097, "task_loss": 0.17937280237674713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747132499452285, "compression_loss": 0.0, "distillation_loss": 0.1342199444770813, "epoch": 2.94, "learning_rate": 3.1225709602359335e-05, "loss": 0.1285, "step": 3098, "task_loss": 0.07676542550325394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747274879798347, "compression_loss": 0.0, "distillation_loss": 0.04866989701986313, "epoch": 2.94, "learning_rate": 3.1215389472338995e-05, "loss": 0.0541, "step": 3099, "task_loss": 0.10313687473535538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747417206688302, "compression_loss": 0.0, "distillation_loss": 0.09249399602413177, "epoch": 2.94, "learning_rate": 3.1205068213263234e-05, "loss": 0.1012, "step": 3100, "task_loss": 0.17944881319999695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747559480132186, "compression_loss": 0.0, "distillation_loss": 0.05482405051589012, "epoch": 2.94, "learning_rate": 3.119474582700695e-05, "loss": 0.0604, "step": 3101, "task_loss": 0.11096543818712234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747701700140034, "compression_loss": 0.0, "distillation_loss": 0.02817433886229992, "epoch": 2.95, "learning_rate": 3.118442231544524e-05, "loss": 0.0331, "step": 3102, "task_loss": 0.07782773673534393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747843866721884, "compression_loss": 0.0, "distillation_loss": 0.03213953226804733, "epoch": 2.95, "learning_rate": 3.117409768045344e-05, "loss": 0.0392, "step": 3103, "task_loss": 0.10243944823741913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7747985979887774, "compression_loss": 0.0, "distillation_loss": 0.2645414471626282, "epoch": 2.95, "learning_rate": 3.116377192390706e-05, "loss": 0.2525, "step": 3104, "task_loss": 0.14380747079849243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748128039647739, "compression_loss": 0.0, "distillation_loss": 0.03608560562133789, "epoch": 2.95, "learning_rate": 3.115344504768183e-05, "loss": 0.044, "step": 3105, "task_loss": 0.115634024143219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748270046011818, "compression_loss": 0.0, "distillation_loss": 0.0855831503868103, "epoch": 2.95, "learning_rate": 3.1143117053653665e-05, "loss": 0.0873, "step": 3106, "task_loss": 0.10302843898534775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748411998990046, "compression_loss": 0.0, "distillation_loss": 0.1709257960319519, "epoch": 2.95, "learning_rate": 3.113278794369869e-05, "loss": 0.1625, "step": 3107, "task_loss": 0.08654191344976425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.774855389859246, "compression_loss": 0.0, "distillation_loss": 0.012697904370725155, "epoch": 2.95, "learning_rate": 3.112245771969327e-05, "loss": 0.0119, "step": 3108, "task_loss": 0.004779975861310959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748695744829096, "compression_loss": 0.0, "distillation_loss": 0.12192719429731369, "epoch": 2.95, "learning_rate": 3.1112126383513914e-05, "loss": 0.1268, "step": 3109, "task_loss": 0.1710333675146103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748837537709993, "compression_loss": 0.0, "distillation_loss": 0.09610234200954437, "epoch": 2.95, "learning_rate": 3.110179393703737e-05, "loss": 0.1014, "step": 3110, "task_loss": 0.1487446278333664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7748979277245187, "compression_loss": 0.0, "distillation_loss": 0.12404096126556396, "epoch": 2.95, "learning_rate": 3.109146038214055e-05, "loss": 0.1215, "step": 3111, "task_loss": 0.09883137792348862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749120963444714, "compression_loss": 0.0, "distillation_loss": 0.07528949528932571, "epoch": 2.96, "learning_rate": 3.108112572070063e-05, "loss": 0.0717, "step": 3112, "task_loss": 0.03988656401634216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749262596318612, "compression_loss": 0.0, "distillation_loss": 0.06444555521011353, "epoch": 2.96, "learning_rate": 3.1070789954594934e-05, "loss": 0.0607, "step": 3113, "task_loss": 0.027471771463751793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749404175876917, "compression_loss": 0.0, "distillation_loss": 0.06135058403015137, "epoch": 2.96, "learning_rate": 3.1060453085701e-05, "loss": 0.0668, "step": 3114, "task_loss": 0.11571139842271805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749545702129665, "compression_loss": 0.0, "distillation_loss": 0.15376602113246918, "epoch": 2.96, "learning_rate": 3.105011511589658e-05, "loss": 0.1476, "step": 3115, "task_loss": 0.09177757799625397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749687175086896, "compression_loss": 0.0, "distillation_loss": 0.04827346280217171, "epoch": 2.96, "learning_rate": 3.103977604705961e-05, "loss": 0.059, "step": 3116, "task_loss": 0.15540531277656555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749828594758643, "compression_loss": 0.0, "distillation_loss": 0.08922263234853745, "epoch": 2.96, "learning_rate": 3.102943588106824e-05, "loss": 0.0912, "step": 3117, "task_loss": 0.10851528495550156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7749969961154946, "compression_loss": 0.0, "distillation_loss": 0.03960554301738739, "epoch": 2.96, "learning_rate": 3.10190946198008e-05, "loss": 0.0461, "step": 3118, "task_loss": 0.10426989942789078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750111274285838, "compression_loss": 0.0, "distillation_loss": 0.05592392012476921, "epoch": 2.96, "learning_rate": 3.100875226513583e-05, "loss": 0.0597, "step": 3119, "task_loss": 0.09348025172948837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750252534161359, "compression_loss": 0.0, "distillation_loss": 0.05171472951769829, "epoch": 2.96, "learning_rate": 3.099840881895208e-05, "loss": 0.0476, "step": 3120, "task_loss": 0.010983137413859367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750393740791546, "compression_loss": 0.0, "distillation_loss": 0.09998922049999237, "epoch": 2.96, "learning_rate": 3.098806428312847e-05, "loss": 0.0984, "step": 3121, "task_loss": 0.08365054428577423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750534894186435, "compression_loss": 0.0, "distillation_loss": 0.020870694890618324, "epoch": 2.96, "learning_rate": 3.097771865954415e-05, "loss": 0.0196, "step": 3122, "task_loss": 0.008448204025626183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750675994356061, "compression_loss": 0.0, "distillation_loss": 0.1635720431804657, "epoch": 2.97, "learning_rate": 3.096737195007845e-05, "loss": 0.1693, "step": 3123, "task_loss": 0.22086140513420105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750817041310464, "compression_loss": 0.0, "distillation_loss": 0.23271842300891876, "epoch": 2.97, "learning_rate": 3.0957024156610884e-05, "loss": 0.2299, "step": 3124, "task_loss": 0.20500387251377106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7750958035059679, "compression_loss": 0.0, "distillation_loss": 0.0583171546459198, "epoch": 2.97, "learning_rate": 3.09466752810212e-05, "loss": 0.0556, "step": 3125, "task_loss": 0.03121706284582615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751098975613742, "compression_loss": 0.0, "distillation_loss": 0.06426501274108887, "epoch": 2.97, "learning_rate": 3.093632532518931e-05, "loss": 0.0598, "step": 3126, "task_loss": 0.019734475761651993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751239862982691, "compression_loss": 0.0, "distillation_loss": 0.15301446616649628, "epoch": 2.97, "learning_rate": 3.092597429099534e-05, "loss": 0.1451, "step": 3127, "task_loss": 0.07339885830879211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751380697176563, "compression_loss": 0.0, "distillation_loss": 0.10157543420791626, "epoch": 2.97, "learning_rate": 3.0915622180319585e-05, "loss": 0.1026, "step": 3128, "task_loss": 0.11221370846033096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751521478205394, "compression_loss": 0.0, "distillation_loss": 0.10004980117082596, "epoch": 2.97, "learning_rate": 3.090526899504259e-05, "loss": 0.096, "step": 3129, "task_loss": 0.05929484963417053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751662206079223, "compression_loss": 0.0, "distillation_loss": 0.035803135484457016, "epoch": 2.97, "learning_rate": 3.0894914737045034e-05, "loss": 0.0331, "step": 3130, "task_loss": 0.009108470752835274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751802880808084, "compression_loss": 0.0, "distillation_loss": 0.12775352597236633, "epoch": 2.97, "learning_rate": 3.088455940820782e-05, "loss": 0.1286, "step": 3131, "task_loss": 0.1366499364376068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7751943502402016, "compression_loss": 0.0, "distillation_loss": 0.19691312313079834, "epoch": 2.97, "learning_rate": 3.087420301041206e-05, "loss": 0.1927, "step": 3132, "task_loss": 0.15475985407829285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752084070871054, "compression_loss": 0.0, "distillation_loss": 0.19966569542884827, "epoch": 2.98, "learning_rate": 3.086384554553902e-05, "loss": 0.1948, "step": 3133, "task_loss": 0.1510712206363678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752224586225235, "compression_loss": 0.0, "distillation_loss": 0.13692443072795868, "epoch": 2.98, "learning_rate": 3.0853487015470206e-05, "loss": 0.1358, "step": 3134, "task_loss": 0.12599924206733704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752365048474598, "compression_loss": 0.0, "distillation_loss": 0.23906008899211884, "epoch": 2.98, "learning_rate": 3.084312742208728e-05, "loss": 0.2498, "step": 3135, "task_loss": 0.34651899337768555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752505457629176, "compression_loss": 0.0, "distillation_loss": 0.03585006296634674, "epoch": 2.98, "learning_rate": 3.083276676727212e-05, "loss": 0.034, "step": 3136, "task_loss": 0.016858315095305443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752645813699011, "compression_loss": 0.0, "distillation_loss": 0.10707780718803406, "epoch": 2.98, "learning_rate": 3.082240505290678e-05, "loss": 0.1068, "step": 3137, "task_loss": 0.10397090762853622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752786116694135, "compression_loss": 0.0, "distillation_loss": 0.09790733456611633, "epoch": 2.98, "learning_rate": 3.081204228087353e-05, "loss": 0.091, "step": 3138, "task_loss": 0.029314137995243073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7752926366624586, "compression_loss": 0.0, "distillation_loss": 0.298001766204834, "epoch": 2.98, "learning_rate": 3.08016784530548e-05, "loss": 0.2974, "step": 3139, "task_loss": 0.2921411693096161 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753066563500404, "compression_loss": 0.0, "distillation_loss": 0.13080675899982452, "epoch": 2.98, "learning_rate": 3.0791313571333244e-05, "loss": 0.1232, "step": 3140, "task_loss": 0.05510927364230156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753206707331621, "compression_loss": 0.0, "distillation_loss": 0.10794495791196823, "epoch": 2.98, "learning_rate": 3.078094763759168e-05, "loss": 0.1087, "step": 3141, "task_loss": 0.1155327633023262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753346798128278, "compression_loss": 0.0, "distillation_loss": 0.0587034747004509, "epoch": 2.98, "learning_rate": 3.0770580653713146e-05, "loss": 0.0565, "step": 3142, "task_loss": 0.03710935637354851 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753486835900409, "compression_loss": 0.0, "distillation_loss": 0.09703940898180008, "epoch": 2.98, "learning_rate": 3.076021262158084e-05, "loss": 0.1103, "step": 3143, "task_loss": 0.22997155785560608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753626820658053, "compression_loss": 0.0, "distillation_loss": 0.0734797865152359, "epoch": 2.99, "learning_rate": 3.074984354307817e-05, "loss": 0.0786, "step": 3144, "task_loss": 0.1250114142894745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753766752411244, "compression_loss": 0.0, "distillation_loss": 0.10003338009119034, "epoch": 2.99, "learning_rate": 3.073947342008873e-05, "loss": 0.1057, "step": 3145, "task_loss": 0.15664803981781006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7753906631170021, "compression_loss": 0.0, "distillation_loss": 0.11784384399652481, "epoch": 2.99, "learning_rate": 3.07291022544963e-05, "loss": 0.124, "step": 3146, "task_loss": 0.17987778782844543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7754046456944421, "compression_loss": 0.0, "distillation_loss": 0.14313513040542603, "epoch": 2.99, "learning_rate": 3.0718730048184855e-05, "loss": 0.1502, "step": 3147, "task_loss": 0.2141174077987671 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.775418622974448, "compression_loss": 0.0, "distillation_loss": 0.027550848200917244, "epoch": 2.99, "learning_rate": 3.0708356803038556e-05, "loss": 0.0306, "step": 3148, "task_loss": 0.05766863748431206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7754325949580235, "compression_loss": 0.0, "distillation_loss": 0.11986173689365387, "epoch": 2.99, "learning_rate": 3.069798252094175e-05, "loss": 0.1198, "step": 3149, "task_loss": 0.11939980834722519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7754465616461722, "compression_loss": 0.0, "distillation_loss": 0.05384514853358269, "epoch": 2.99, "learning_rate": 3.068760720377897e-05, "loss": 0.0502, "step": 3150, "task_loss": 0.017584798857569695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7754605230398979, "compression_loss": 0.0, "distillation_loss": 0.12095589935779572, "epoch": 2.99, "learning_rate": 3.067723085343496e-05, "loss": 0.1254, "step": 3151, "task_loss": 0.16499733924865723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7754744791402043, "compression_loss": 0.0, "distillation_loss": 0.23978760838508606, "epoch": 2.99, "learning_rate": 3.066685347179462e-05, "loss": 0.2298, "step": 3152, "task_loss": 0.1398588865995407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.775488429948095, "compression_loss": 0.0, "distillation_loss": 0.016089707612991333, "epoch": 2.99, "learning_rate": 3.065647506074306e-05, "loss": 0.0148, "step": 3153, "task_loss": 0.002878313884139061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7755023754645736, "compression_loss": 0.0, "distillation_loss": 0.13207751512527466, "epoch": 3.0, "learning_rate": 3.064609562216555e-05, "loss": 0.1289, "step": 3154, "task_loss": 0.10021229833364487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7755163156906439, "compression_loss": 0.0, "distillation_loss": 0.022115526720881462, "epoch": 3.0, "learning_rate": 3.063571515794759e-05, "loss": 0.0208, "step": 3155, "task_loss": 0.008827542886137962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7755302506273097, "compression_loss": 0.0, "distillation_loss": 0.07137614488601685, "epoch": 3.0, "learning_rate": 3.062533366997483e-05, "loss": 0.0964, "step": 3156, "task_loss": 0.3213070034980774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7755441802755745, "compression_loss": 0.0, "distillation_loss": 0.01968713104724884, "epoch": 3.0, "learning_rate": 3.061495116013311e-05, "loss": 0.0209, "step": 3157, "task_loss": 0.03145528584718704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.775558104636442, "compression_loss": 0.0, "distillation_loss": 0.02357085794210434, "epoch": 3.0, "learning_rate": 3.060456763030847e-05, "loss": 0.0219, "step": 3158, "task_loss": 0.006720980629324913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.5919843851205062, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.757812535077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7755720237109159, "compression_loss": 0.0, "distillation_loss": 0.11487483233213425, "epoch": 3.0, "learning_rate": 3.059418308238713e-05, "loss": 0.1098, "step": 3159, "task_loss": 0.06400777399539948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7755859375, "compression_loss": 0.0, "distillation_loss": 0.3807898461818695, "epoch": 3.0, "learning_rate": 3.0583797518255505e-05, "loss": 0.3608, "step": 3160, "task_loss": 0.18057574331760406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7755998460046979, "compression_loss": 0.0, "distillation_loss": 0.08554988354444504, "epoch": 3.0, "learning_rate": 3.057341093980015e-05, "loss": 0.081, "step": 3161, "task_loss": 0.04051545634865761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756137492260131, "compression_loss": 0.0, "distillation_loss": 0.5044205784797668, "epoch": 3.0, "learning_rate": 3.056302334890786e-05, "loss": 0.4752, "step": 3162, "task_loss": 0.2119530588388443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756276471649496, "compression_loss": 0.0, "distillation_loss": 0.7463757395744324, "epoch": 3.0, "learning_rate": 3.055263474746559e-05, "loss": 0.7219, "step": 3163, "task_loss": 0.5015002489089966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756415398225108, "compression_loss": 0.0, "distillation_loss": 0.15632954239845276, "epoch": 3.0, "learning_rate": 3.054224513736048e-05, "loss": 0.1498, "step": 3164, "task_loss": 0.09135162830352783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756554271997006, "compression_loss": 0.0, "distillation_loss": 0.4045957326889038, "epoch": 3.01, "learning_rate": 3.0531854520479844e-05, "loss": 0.3806, "step": 3165, "task_loss": 0.16499730944633484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756693092975225, "compression_loss": 0.0, "distillation_loss": 0.43263140320777893, "epoch": 3.01, "learning_rate": 3.05214628987112e-05, "loss": 0.4179, "step": 3166, "task_loss": 0.2850673496723175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756831861169803, "compression_loss": 0.0, "distillation_loss": 0.23246167600154877, "epoch": 3.01, "learning_rate": 3.0511070273942217e-05, "loss": 0.2326, "step": 3167, "task_loss": 0.23360560834407806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7756970576590777, "compression_loss": 0.0, "distillation_loss": 0.1095157265663147, "epoch": 3.01, "learning_rate": 3.0500676648060776e-05, "loss": 0.1015, "step": 3168, "task_loss": 0.029302000999450684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757109239248183, "compression_loss": 0.0, "distillation_loss": 0.31011509895324707, "epoch": 3.01, "learning_rate": 3.049028202295494e-05, "loss": 0.3013, "step": 3169, "task_loss": 0.22210848331451416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757247849152058, "compression_loss": 0.0, "distillation_loss": 0.24750226736068726, "epoch": 3.01, "learning_rate": 3.0479886400512937e-05, "loss": 0.2541, "step": 3170, "task_loss": 0.31305140256881714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.775738640631244, "compression_loss": 0.0, "distillation_loss": 0.3639996349811554, "epoch": 3.01, "learning_rate": 3.0469489782623163e-05, "loss": 0.3661, "step": 3171, "task_loss": 0.3845079243183136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757524910739364, "compression_loss": 0.0, "distillation_loss": 0.20695006847381592, "epoch": 3.01, "learning_rate": 3.045909217117424e-05, "loss": 0.2035, "step": 3172, "task_loss": 0.1726771593093872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757663362442868, "compression_loss": 0.0, "distillation_loss": 0.511163592338562, "epoch": 3.01, "learning_rate": 3.0448693568054924e-05, "loss": 0.5024, "step": 3173, "task_loss": 0.42367231845855713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757801761432989, "compression_loss": 0.0, "distillation_loss": 0.4330875873565674, "epoch": 3.01, "learning_rate": 3.0438293975154186e-05, "loss": 0.4055, "step": 3174, "task_loss": 0.157136470079422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7757940107719762, "compression_loss": 0.0, "distillation_loss": 0.3330579698085785, "epoch": 3.02, "learning_rate": 3.042789339436116e-05, "loss": 0.3187, "step": 3175, "task_loss": 0.18938302993774414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758078401313225, "compression_loss": 0.0, "distillation_loss": 0.26333099603652954, "epoch": 3.02, "learning_rate": 3.041749182756515e-05, "loss": 0.2615, "step": 3176, "task_loss": 0.24524812400341034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758216642223416, "compression_loss": 0.0, "distillation_loss": 0.11996375769376755, "epoch": 3.02, "learning_rate": 3.0407089276655664e-05, "loss": 0.1205, "step": 3177, "task_loss": 0.12564218044281006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.775835483046037, "compression_loss": 0.0, "distillation_loss": 0.18018177151679993, "epoch": 3.02, "learning_rate": 3.039668574352237e-05, "loss": 0.1752, "step": 3178, "task_loss": 0.12995178997516632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758492966034125, "compression_loss": 0.0, "distillation_loss": 0.23442718386650085, "epoch": 3.02, "learning_rate": 3.0386281230055113e-05, "loss": 0.2305, "step": 3179, "task_loss": 0.19505998492240906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758631048954718, "compression_loss": 0.0, "distillation_loss": 0.22563649713993073, "epoch": 3.02, "learning_rate": 3.0375875738143938e-05, "loss": 0.2198, "step": 3180, "task_loss": 0.16684770584106445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758769079232184, "compression_loss": 0.0, "distillation_loss": 0.05871054530143738, "epoch": 3.02, "learning_rate": 3.0365469269679042e-05, "loss": 0.0537, "step": 3181, "task_loss": 0.008890055119991302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7758907056876562, "compression_loss": 0.0, "distillation_loss": 0.07384837418794632, "epoch": 3.02, "learning_rate": 3.0355061826550813e-05, "loss": 0.078, "step": 3182, "task_loss": 0.1150825098156929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7759044981897888, "compression_loss": 0.0, "distillation_loss": 0.29694080352783203, "epoch": 3.02, "learning_rate": 3.0344653410649815e-05, "loss": 0.2839, "step": 3183, "task_loss": 0.1665906310081482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7759182854306198, "compression_loss": 0.0, "distillation_loss": 0.06391322612762451, "epoch": 3.02, "learning_rate": 3.033424402386678e-05, "loss": 0.0754, "step": 3184, "task_loss": 0.1787717342376709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.775932067411153, "compression_loss": 0.0, "distillation_loss": 0.09081655740737915, "epoch": 3.02, "learning_rate": 3.032383366809263e-05, "loss": 0.0906, "step": 3185, "task_loss": 0.088164322078228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.775945844132392, "compression_loss": 0.0, "distillation_loss": 0.2625811696052551, "epoch": 3.03, "learning_rate": 3.031342234521845e-05, "loss": 0.2886, "step": 3186, "task_loss": 0.5232532620429993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7759596155953404, "compression_loss": 0.0, "distillation_loss": 0.05291663110256195, "epoch": 3.03, "learning_rate": 3.030301005713552e-05, "loss": 0.0481, "step": 3187, "task_loss": 0.004770837724208832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7759733818010022, "compression_loss": 0.0, "distillation_loss": 0.15582989156246185, "epoch": 3.03, "learning_rate": 3.0292596805735274e-05, "loss": 0.1483, "step": 3188, "task_loss": 0.08084064722061157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7759871427503807, "compression_loss": 0.0, "distillation_loss": 0.20074017345905304, "epoch": 3.03, "learning_rate": 3.028218259290932e-05, "loss": 0.1874, "step": 3189, "task_loss": 0.06705089658498764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.77600089844448, "compression_loss": 0.0, "distillation_loss": 0.06447502970695496, "epoch": 3.03, "learning_rate": 3.0271767420549463e-05, "loss": 0.0681, "step": 3190, "task_loss": 0.1006789430975914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7760146488843033, "compression_loss": 0.0, "distillation_loss": 0.2723419964313507, "epoch": 3.03, "learning_rate": 3.0261351290547667e-05, "loss": 0.2704, "step": 3191, "task_loss": 0.25289347767829895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7760283940708547, "compression_loss": 0.0, "distillation_loss": 0.2833019196987152, "epoch": 3.03, "learning_rate": 3.025093420479607e-05, "loss": 0.2691, "step": 3192, "task_loss": 0.1415441334247589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7760421340051376, "compression_loss": 0.0, "distillation_loss": 0.12315277010202408, "epoch": 3.03, "learning_rate": 3.0240516165186976e-05, "loss": 0.1165, "step": 3193, "task_loss": 0.056251607835292816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7760558686881558, "compression_loss": 0.0, "distillation_loss": 0.39456677436828613, "epoch": 3.03, "learning_rate": 3.0230097173612896e-05, "loss": 0.3795, "step": 3194, "task_loss": 0.24376647174358368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776069598120913, "compression_loss": 0.0, "distillation_loss": 0.13045181334018707, "epoch": 3.03, "learning_rate": 3.021967723196647e-05, "loss": 0.1243, "step": 3195, "task_loss": 0.06902102380990982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7760833223044128, "compression_loss": 0.0, "distillation_loss": 0.13044685125350952, "epoch": 3.04, "learning_rate": 3.020925634214054e-05, "loss": 0.1433, "step": 3196, "task_loss": 0.25945261120796204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776097041239659, "compression_loss": 0.0, "distillation_loss": 0.07756803929805756, "epoch": 3.04, "learning_rate": 3.01988345060281e-05, "loss": 0.0819, "step": 3197, "task_loss": 0.12053569406270981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761107549276551, "compression_loss": 0.0, "distillation_loss": 0.285247802734375, "epoch": 3.04, "learning_rate": 3.018841172552234e-05, "loss": 0.2711, "step": 3198, "task_loss": 0.14363518357276917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761244633694051, "compression_loss": 0.0, "distillation_loss": 0.36870819330215454, "epoch": 3.04, "learning_rate": 3.01779880025166e-05, "loss": 0.3675, "step": 3199, "task_loss": 0.3562384843826294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761381665659124, "compression_loss": 0.0, "distillation_loss": 0.09676843881607056, "epoch": 3.04, "learning_rate": 3.0167563338904402e-05, "loss": 0.0949, "step": 3200, "task_loss": 0.0785759687423706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761518645181807, "compression_loss": 0.0, "distillation_loss": 0.0968627855181694, "epoch": 3.04, "learning_rate": 3.0157137736579445e-05, "loss": 0.0951, "step": 3201, "task_loss": 0.07877330482006073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761655572272138, "compression_loss": 0.0, "distillation_loss": 0.2662609815597534, "epoch": 3.04, "learning_rate": 3.014671119743556e-05, "loss": 0.2659, "step": 3202, "task_loss": 0.26285213232040405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7761792446940153, "compression_loss": 0.0, "distillation_loss": 0.07620637863874435, "epoch": 3.04, "learning_rate": 3.013628372336682e-05, "loss": 0.0915, "step": 3203, "task_loss": 0.22959944605827332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776192926919589, "compression_loss": 0.0, "distillation_loss": 0.13465449213981628, "epoch": 3.04, "learning_rate": 3.0125855316267394e-05, "loss": 0.1472, "step": 3204, "task_loss": 0.2603115439414978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762066039049383, "compression_loss": 0.0, "distillation_loss": 0.14497964084148407, "epoch": 3.04, "learning_rate": 3.0115425978031663e-05, "loss": 0.1501, "step": 3205, "task_loss": 0.1961698830127716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762202756510672, "compression_loss": 0.0, "distillation_loss": 0.24775618314743042, "epoch": 3.04, "learning_rate": 3.0104995710554174e-05, "loss": 0.2393, "step": 3206, "task_loss": 0.16288542747497559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762339421589793, "compression_loss": 0.0, "distillation_loss": 0.13831278681755066, "epoch": 3.05, "learning_rate": 3.0094564515729623e-05, "loss": 0.1352, "step": 3207, "task_loss": 0.10688167065382004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762476034296781, "compression_loss": 0.0, "distillation_loss": 0.04619987681508064, "epoch": 3.05, "learning_rate": 3.0084132395452896e-05, "loss": 0.0428, "step": 3208, "task_loss": 0.011970948427915573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762612594641676, "compression_loss": 0.0, "distillation_loss": 0.048860594630241394, "epoch": 3.05, "learning_rate": 3.0073699351619033e-05, "loss": 0.0447, "step": 3209, "task_loss": 0.0075436122715473175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762749102634512, "compression_loss": 0.0, "distillation_loss": 0.09679286926984787, "epoch": 3.05, "learning_rate": 3.0063265386123247e-05, "loss": 0.0926, "step": 3210, "task_loss": 0.054950546473264694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7762885558285327, "compression_loss": 0.0, "distillation_loss": 0.47562968730926514, "epoch": 3.05, "learning_rate": 3.0052830500860912e-05, "loss": 0.4544, "step": 3211, "task_loss": 0.26370471715927124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763021961604157, "compression_loss": 0.0, "distillation_loss": 0.10135069489479065, "epoch": 3.05, "learning_rate": 3.0042394697727587e-05, "loss": 0.0972, "step": 3212, "task_loss": 0.059726666659116745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763158312601041, "compression_loss": 0.0, "distillation_loss": 0.17847372591495514, "epoch": 3.05, "learning_rate": 3.0031957978618986e-05, "loss": 0.1807, "step": 3213, "task_loss": 0.20080722868442535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763294611286014, "compression_loss": 0.0, "distillation_loss": 0.17386582493782043, "epoch": 3.05, "learning_rate": 3.002152034543098e-05, "loss": 0.1664, "step": 3214, "task_loss": 0.09940002113580704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763430857669111, "compression_loss": 0.0, "distillation_loss": 0.10909870266914368, "epoch": 3.05, "learning_rate": 3.0011081800059616e-05, "loss": 0.1128, "step": 3215, "task_loss": 0.14585895836353302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763567051760373, "compression_loss": 0.0, "distillation_loss": 0.045543644577264786, "epoch": 3.05, "learning_rate": 3.0000642344401113e-05, "loss": 0.0494, "step": 3216, "task_loss": 0.08457530289888382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763703193569834, "compression_loss": 0.0, "distillation_loss": 0.14089834690093994, "epoch": 3.06, "learning_rate": 2.999020198035184e-05, "loss": 0.1429, "step": 3217, "task_loss": 0.16091729700565338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763839283107532, "compression_loss": 0.0, "distillation_loss": 0.15442857146263123, "epoch": 3.06, "learning_rate": 2.997976070980836e-05, "loss": 0.1538, "step": 3218, "task_loss": 0.1483321487903595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7763975320383504, "compression_loss": 0.0, "distillation_loss": 0.1512613296508789, "epoch": 3.06, "learning_rate": 2.996931853466734e-05, "loss": 0.1643, "step": 3219, "task_loss": 0.28167420625686646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7764111305407785, "compression_loss": 0.0, "distillation_loss": 0.129611536860466, "epoch": 3.06, "learning_rate": 2.9958875456825692e-05, "loss": 0.1331, "step": 3220, "task_loss": 0.1641448736190796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7764247238190414, "compression_loss": 0.0, "distillation_loss": 0.06467771530151367, "epoch": 3.06, "learning_rate": 2.9948431478180434e-05, "loss": 0.0646, "step": 3221, "task_loss": 0.06406955420970917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7764383118741426, "compression_loss": 0.0, "distillation_loss": 0.040332306176424026, "epoch": 3.06, "learning_rate": 2.9937986600628758e-05, "loss": 0.0371, "step": 3222, "task_loss": 0.008311469107866287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776451894707086, "compression_loss": 0.0, "distillation_loss": 0.23755374550819397, "epoch": 3.06, "learning_rate": 2.992754082606804e-05, "loss": 0.2371, "step": 3223, "task_loss": 0.23277811706066132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776465472318875, "compression_loss": 0.0, "distillation_loss": 0.2364281862974167, "epoch": 3.06, "learning_rate": 2.9917094156395796e-05, "loss": 0.2324, "step": 3224, "task_loss": 0.19599126279354095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7764790447105135, "compression_loss": 0.0, "distillation_loss": 0.058149438351392746, "epoch": 3.06, "learning_rate": 2.990664659350973e-05, "loss": 0.0539, "step": 3225, "task_loss": 0.015191374346613884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7764926118830051, "compression_loss": 0.0, "distillation_loss": 0.20538832247257233, "epoch": 3.06, "learning_rate": 2.9896198139307668e-05, "loss": 0.1981, "step": 3226, "task_loss": 0.13257679343223572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765061738373535, "compression_loss": 0.0, "distillation_loss": 0.033098138868808746, "epoch": 3.06, "learning_rate": 2.9885748795687642e-05, "loss": 0.0396, "step": 3227, "task_loss": 0.09805357456207275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765197305745625, "compression_loss": 0.0, "distillation_loss": 0.13397681713104248, "epoch": 3.07, "learning_rate": 2.9875298564547805e-05, "loss": 0.1336, "step": 3228, "task_loss": 0.1301775723695755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765332820956355, "compression_loss": 0.0, "distillation_loss": 0.1260242760181427, "epoch": 3.07, "learning_rate": 2.9864847447786503e-05, "loss": 0.1186, "step": 3229, "task_loss": 0.05128313973546028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765468284015764, "compression_loss": 0.0, "distillation_loss": 0.14219728112220764, "epoch": 3.07, "learning_rate": 2.9854395447302246e-05, "loss": 0.1386, "step": 3230, "task_loss": 0.10616907477378845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765603694933888, "compression_loss": 0.0, "distillation_loss": 0.06799346953630447, "epoch": 3.07, "learning_rate": 2.9843942564993672e-05, "loss": 0.0646, "step": 3231, "task_loss": 0.0343768373131752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7765739053720765, "compression_loss": 0.0, "distillation_loss": 0.03196606785058975, "epoch": 3.07, "learning_rate": 2.98334888027596e-05, "loss": 0.0436, "step": 3232, "task_loss": 0.14865921437740326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776587436038643, "compression_loss": 0.0, "distillation_loss": 0.12633199989795685, "epoch": 3.07, "learning_rate": 2.9823034162499007e-05, "loss": 0.1216, "step": 3233, "task_loss": 0.07861751317977905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766009614940921, "compression_loss": 0.0, "distillation_loss": 0.11836487054824829, "epoch": 3.07, "learning_rate": 2.981257864611104e-05, "loss": 0.1123, "step": 3234, "task_loss": 0.058093033730983734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766144817394275, "compression_loss": 0.0, "distillation_loss": 0.09729697555303574, "epoch": 3.07, "learning_rate": 2.980212225549498e-05, "loss": 0.0916, "step": 3235, "task_loss": 0.040597811341285706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766279967756528, "compression_loss": 0.0, "distillation_loss": 0.043897949159145355, "epoch": 3.07, "learning_rate": 2.9791664992550273e-05, "loss": 0.0408, "step": 3236, "task_loss": 0.012871745973825455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766415066037717, "compression_loss": 0.0, "distillation_loss": 0.27021345496177673, "epoch": 3.07, "learning_rate": 2.978120685917656e-05, "loss": 0.2581, "step": 3237, "task_loss": 0.1487370729446411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766550112247879, "compression_loss": 0.0, "distillation_loss": 0.24136856198310852, "epoch": 3.08, "learning_rate": 2.9770747857273584e-05, "loss": 0.2404, "step": 3238, "task_loss": 0.23204952478408813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766685106397051, "compression_loss": 0.0, "distillation_loss": 0.038442693650722504, "epoch": 3.08, "learning_rate": 2.9760287988741293e-05, "loss": 0.0354, "step": 3239, "task_loss": 0.008406125009059906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776682004849527, "compression_loss": 0.0, "distillation_loss": 0.2253015786409378, "epoch": 3.08, "learning_rate": 2.9749827255479755e-05, "loss": 0.2234, "step": 3240, "task_loss": 0.20639631152153015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7766954938552572, "compression_loss": 0.0, "distillation_loss": 0.19598889350891113, "epoch": 3.08, "learning_rate": 2.9739365659389223e-05, "loss": 0.1854, "step": 3241, "task_loss": 0.08999612182378769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767089776578995, "compression_loss": 0.0, "distillation_loss": 0.029667746275663376, "epoch": 3.08, "learning_rate": 2.972890320237009e-05, "loss": 0.0271, "step": 3242, "task_loss": 0.0044384244829416275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767224562584574, "compression_loss": 0.0, "distillation_loss": 0.03521028161048889, "epoch": 3.08, "learning_rate": 2.971843988632292e-05, "loss": 0.0321, "step": 3243, "task_loss": 0.00444982573390007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767359296579347, "compression_loss": 0.0, "distillation_loss": 0.06014445796608925, "epoch": 3.08, "learning_rate": 2.970797571314842e-05, "loss": 0.0566, "step": 3244, "task_loss": 0.02440224587917328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767493978573352, "compression_loss": 0.0, "distillation_loss": 0.27860748767852783, "epoch": 3.08, "learning_rate": 2.9697510684747454e-05, "loss": 0.2662, "step": 3245, "task_loss": 0.15431803464889526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767628608576623, "compression_loss": 0.0, "distillation_loss": 0.36406850814819336, "epoch": 3.08, "learning_rate": 2.9687044803021057e-05, "loss": 0.3679, "step": 3246, "task_loss": 0.4021039605140686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.77677631865992, "compression_loss": 0.0, "distillation_loss": 0.4641655385494232, "epoch": 3.08, "learning_rate": 2.9676578069870392e-05, "loss": 0.4604, "step": 3247, "task_loss": 0.4263751804828644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7767897712651117, "compression_loss": 0.0, "distillation_loss": 0.10289011895656586, "epoch": 3.08, "learning_rate": 2.9666110487196798e-05, "loss": 0.1047, "step": 3248, "task_loss": 0.12141219526529312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768032186742413, "compression_loss": 0.0, "distillation_loss": 0.09398964792490005, "epoch": 3.09, "learning_rate": 2.9655642056901762e-05, "loss": 0.0971, "step": 3249, "task_loss": 0.1248759999871254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768166608883122, "compression_loss": 0.0, "distillation_loss": 0.20722612738609314, "epoch": 3.09, "learning_rate": 2.9645172780886927e-05, "loss": 0.2046, "step": 3250, "task_loss": 0.18093663454055786 }, { "epoch": 3.09, "eval_accuracy": 0.8795871559633027, "eval_loss": 0.5020281076431274, "eval_runtime": 17.7871, "eval_samples_per_second": 49.024, "eval_steps_per_second": 6.128, "step": 3250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768300979083285, "compression_loss": 0.0, "distillation_loss": 0.0628579705953598, "epoch": 3.09, "learning_rate": 2.9634702661054085e-05, "loss": 0.0577, "step": 3251, "task_loss": 0.010829754173755646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768435297352936, "compression_loss": 0.0, "distillation_loss": 0.04929596930742264, "epoch": 3.09, "learning_rate": 2.962423169930518e-05, "loss": 0.0538, "step": 3252, "task_loss": 0.09398236870765686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768569563702111, "compression_loss": 0.0, "distillation_loss": 0.055220600217580795, "epoch": 3.09, "learning_rate": 2.961375989754232e-05, "loss": 0.0594, "step": 3253, "task_loss": 0.09694081544876099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776870377814085, "compression_loss": 0.0, "distillation_loss": 0.14030757546424866, "epoch": 3.09, "learning_rate": 2.9603287257667754e-05, "loss": 0.1347, "step": 3254, "task_loss": 0.08383115381002426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7768837940679186, "compression_loss": 0.0, "distillation_loss": 0.05990925058722496, "epoch": 3.09, "learning_rate": 2.9592813781583885e-05, "loss": 0.0712, "step": 3255, "task_loss": 0.17235514521598816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776897205132716, "compression_loss": 0.0, "distillation_loss": 0.14225684106349945, "epoch": 3.09, "learning_rate": 2.958233947119328e-05, "loss": 0.144, "step": 3256, "task_loss": 0.16001349687576294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7769106110094804, "compression_loss": 0.0, "distillation_loss": 0.03743474930524826, "epoch": 3.09, "learning_rate": 2.9571864328398636e-05, "loss": 0.0344, "step": 3257, "task_loss": 0.006805334240198135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776924011699216, "compression_loss": 0.0, "distillation_loss": 0.13843439519405365, "epoch": 3.09, "learning_rate": 2.956138835510282e-05, "loss": 0.1333, "step": 3258, "task_loss": 0.08740270882844925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.776937407202926, "compression_loss": 0.0, "distillation_loss": 0.044483255594968796, "epoch": 3.09, "learning_rate": 2.9550911553208838e-05, "loss": 0.0404, "step": 3259, "task_loss": 0.003829212859272957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7769507975216144, "compression_loss": 0.0, "distillation_loss": 0.3198070526123047, "epoch": 3.1, "learning_rate": 2.954043392461986e-05, "loss": 0.3065, "step": 3260, "task_loss": 0.18672311305999756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7769641826562849, "compression_loss": 0.0, "distillation_loss": 0.1478300392627716, "epoch": 3.1, "learning_rate": 2.952995547123919e-05, "loss": 0.1487, "step": 3261, "task_loss": 0.1560606062412262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7769775626079409, "compression_loss": 0.0, "distillation_loss": 0.05190206691622734, "epoch": 3.1, "learning_rate": 2.9519476194970286e-05, "loss": 0.0471, "step": 3262, "task_loss": 0.0037672966718673706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7769909373775864, "compression_loss": 0.0, "distillation_loss": 0.07992243766784668, "epoch": 3.1, "learning_rate": 2.9508996097716777e-05, "loss": 0.0892, "step": 3263, "task_loss": 0.17313553392887115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770043069662249, "compression_loss": 0.0, "distillation_loss": 0.1712629646062851, "epoch": 3.1, "learning_rate": 2.949851518138241e-05, "loss": 0.1902, "step": 3264, "task_loss": 0.3601612150669098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770176713748601, "compression_loss": 0.0, "distillation_loss": 0.020557358860969543, "epoch": 3.1, "learning_rate": 2.948803344787109e-05, "loss": 0.019, "step": 3265, "task_loss": 0.004615806043148041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770310306044956, "compression_loss": 0.0, "distillation_loss": 0.058595433831214905, "epoch": 3.1, "learning_rate": 2.947755089908688e-05, "loss": 0.0613, "step": 3266, "task_loss": 0.0856269896030426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770443846561353, "compression_loss": 0.0, "distillation_loss": 0.13503427803516388, "epoch": 3.1, "learning_rate": 2.946706753693398e-05, "loss": 0.1376, "step": 3267, "task_loss": 0.16037359833717346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770577335307828, "compression_loss": 0.0, "distillation_loss": 0.06786477565765381, "epoch": 3.1, "learning_rate": 2.945658336331676e-05, "loss": 0.0684, "step": 3268, "task_loss": 0.07298760861158371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770710772294416, "compression_loss": 0.0, "distillation_loss": 0.06129030883312225, "epoch": 3.1, "learning_rate": 2.9446098380139703e-05, "loss": 0.0586, "step": 3269, "task_loss": 0.03390103578567505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770844157531156, "compression_loss": 0.0, "distillation_loss": 0.07370973378419876, "epoch": 3.11, "learning_rate": 2.9435612589307458e-05, "loss": 0.0731, "step": 3270, "task_loss": 0.06739000231027603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7770977491028085, "compression_loss": 0.0, "distillation_loss": 0.0707666426897049, "epoch": 3.11, "learning_rate": 2.942512599272483e-05, "loss": 0.0739, "step": 3271, "task_loss": 0.1019490510225296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771110772795238, "compression_loss": 0.0, "distillation_loss": 0.08299912512302399, "epoch": 3.11, "learning_rate": 2.9414638592296752e-05, "loss": 0.0816, "step": 3272, "task_loss": 0.06940528750419617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771244002842652, "compression_loss": 0.0, "distillation_loss": 0.13758215308189392, "epoch": 3.11, "learning_rate": 2.9404150389928316e-05, "loss": 0.1317, "step": 3273, "task_loss": 0.07901225984096527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771377181180366, "compression_loss": 0.0, "distillation_loss": 0.14945004880428314, "epoch": 3.11, "learning_rate": 2.9393661387524745e-05, "loss": 0.1439, "step": 3274, "task_loss": 0.09377557784318924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771510307818414, "compression_loss": 0.0, "distillation_loss": 0.13088908791542053, "epoch": 3.11, "learning_rate": 2.9383171586991424e-05, "loss": 0.1261, "step": 3275, "task_loss": 0.0828273668885231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771643382766835, "compression_loss": 0.0, "distillation_loss": 0.1356673389673233, "epoch": 3.11, "learning_rate": 2.9372680990233875e-05, "loss": 0.1301, "step": 3276, "task_loss": 0.07979649305343628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771776406035665, "compression_loss": 0.0, "distillation_loss": 0.15914839506149292, "epoch": 3.11, "learning_rate": 2.9362189599157776e-05, "loss": 0.1522, "step": 3277, "task_loss": 0.08918256312608719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7771909377634941, "compression_loss": 0.0, "distillation_loss": 0.031945958733558655, "epoch": 3.11, "learning_rate": 2.9351697415668917e-05, "loss": 0.0375, "step": 3278, "task_loss": 0.08777586370706558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772042297574699, "compression_loss": 0.0, "distillation_loss": 0.07947254180908203, "epoch": 3.11, "learning_rate": 2.9341204441673266e-05, "loss": 0.0822, "step": 3279, "task_loss": 0.106949083507061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772175165864978, "compression_loss": 0.0, "distillation_loss": 0.11136071383953094, "epoch": 3.11, "learning_rate": 2.9330710679076916e-05, "loss": 0.1059, "step": 3280, "task_loss": 0.057253483682870865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772307982515813, "compression_loss": 0.0, "distillation_loss": 0.04170932248234749, "epoch": 3.12, "learning_rate": 2.9320216129786116e-05, "loss": 0.0457, "step": 3281, "task_loss": 0.08145752549171448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777244074753724, "compression_loss": 0.0, "distillation_loss": 0.014708688482642174, "epoch": 3.12, "learning_rate": 2.9309720795707257e-05, "loss": 0.0136, "step": 3282, "task_loss": 0.0032885856926441193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772573460939297, "compression_loss": 0.0, "distillation_loss": 0.08308391273021698, "epoch": 3.12, "learning_rate": 2.9299224678746855e-05, "loss": 0.0762, "step": 3283, "task_loss": 0.014657005667686462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772706122732022, "compression_loss": 0.0, "distillation_loss": 0.1468481719493866, "epoch": 3.12, "learning_rate": 2.928872778081158e-05, "loss": 0.1414, "step": 3284, "task_loss": 0.09249945729970932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772838732925449, "compression_loss": 0.0, "distillation_loss": 0.029921449720859528, "epoch": 3.12, "learning_rate": 2.9278230103808257e-05, "loss": 0.0345, "step": 3285, "task_loss": 0.07549962401390076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7772971291529618, "compression_loss": 0.0, "distillation_loss": 0.0418395921587944, "epoch": 3.12, "learning_rate": 2.9267731649643827e-05, "loss": 0.048, "step": 3286, "task_loss": 0.10373726487159729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773103798554563, "compression_loss": 0.0, "distillation_loss": 0.05867641791701317, "epoch": 3.12, "learning_rate": 2.9257232420225394e-05, "loss": 0.0721, "step": 3287, "task_loss": 0.19298899173736572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773236254010323, "compression_loss": 0.0, "distillation_loss": 0.05694715678691864, "epoch": 3.12, "learning_rate": 2.9246732417460178e-05, "loss": 0.0553, "step": 3288, "task_loss": 0.0401817262172699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773368657906933, "compression_loss": 0.0, "distillation_loss": 0.056060872972011566, "epoch": 3.12, "learning_rate": 2.9236231643255578e-05, "loss": 0.0606, "step": 3289, "task_loss": 0.10119974613189697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773501010254431, "compression_loss": 0.0, "distillation_loss": 0.08849294483661652, "epoch": 3.12, "learning_rate": 2.922573009951909e-05, "loss": 0.0865, "step": 3290, "task_loss": 0.06868458539247513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773633311062854, "compression_loss": 0.0, "distillation_loss": 0.0786486566066742, "epoch": 3.13, "learning_rate": 2.9215227788158382e-05, "loss": 0.0987, "step": 3291, "task_loss": 0.27886343002319336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7773765560342237, "compression_loss": 0.0, "distillation_loss": 0.08726784586906433, "epoch": 3.13, "learning_rate": 2.920472471108125e-05, "loss": 0.0813, "step": 3292, "task_loss": 0.0275074802339077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777389775810262, "compression_loss": 0.0, "distillation_loss": 0.10106363892555237, "epoch": 3.13, "learning_rate": 2.919422087019561e-05, "loss": 0.0965, "step": 3293, "task_loss": 0.055741891264915466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774029904354037, "compression_loss": 0.0, "distillation_loss": 0.14217272400856018, "epoch": 3.13, "learning_rate": 2.9183716267409562e-05, "loss": 0.1446, "step": 3294, "task_loss": 0.16670069098472595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774161999106526, "compression_loss": 0.0, "distillation_loss": 0.10469906777143478, "epoch": 3.13, "learning_rate": 2.9173210904631297e-05, "loss": 0.1019, "step": 3295, "task_loss": 0.07687127590179443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774294042370123, "compression_loss": 0.0, "distillation_loss": 0.17787286639213562, "epoch": 3.13, "learning_rate": 2.916270478376918e-05, "loss": 0.1797, "step": 3296, "task_loss": 0.19574972987174988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774426034154867, "compression_loss": 0.0, "distillation_loss": 0.22817805409431458, "epoch": 3.13, "learning_rate": 2.9152197906731687e-05, "loss": 0.2223, "step": 3297, "task_loss": 0.16891571879386902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774557974470792, "compression_loss": 0.0, "distillation_loss": 0.2010933756828308, "epoch": 3.13, "learning_rate": 2.9141690275427445e-05, "loss": 0.2089, "step": 3298, "task_loss": 0.27871859073638916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774689863327936, "compression_loss": 0.0, "distillation_loss": 0.07105930149555206, "epoch": 3.13, "learning_rate": 2.9131181891765226e-05, "loss": 0.067, "step": 3299, "task_loss": 0.030665744096040726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7774821700736336, "compression_loss": 0.0, "distillation_loss": 0.03237374126911163, "epoch": 3.13, "learning_rate": 2.9120672757653916e-05, "loss": 0.0299, "step": 3300, "task_loss": 0.00755789689719677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777495348670603, "compression_loss": 0.0, "distillation_loss": 0.04193927347660065, "epoch": 3.13, "learning_rate": 2.9110162875002552e-05, "loss": 0.0383, "step": 3301, "task_loss": 0.005132727324962616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775085221247052, "compression_loss": 0.0, "distillation_loss": 0.02390095964074135, "epoch": 3.14, "learning_rate": 2.909965224572031e-05, "loss": 0.0227, "step": 3302, "task_loss": 0.011394314467906952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775216904369441, "compression_loss": 0.0, "distillation_loss": 0.11238003522157669, "epoch": 3.14, "learning_rate": 2.9089140871716492e-05, "loss": 0.1184, "step": 3303, "task_loss": 0.1725841611623764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775348536083233, "compression_loss": 0.0, "distillation_loss": 0.09369160979986191, "epoch": 3.14, "learning_rate": 2.9078628754900543e-05, "loss": 0.0978, "step": 3304, "task_loss": 0.1350669115781784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775480116398465, "compression_loss": 0.0, "distillation_loss": 0.04276084154844284, "epoch": 3.14, "learning_rate": 2.9068115897182036e-05, "loss": 0.0476, "step": 3305, "task_loss": 0.09119967371225357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775611645325174, "compression_loss": 0.0, "distillation_loss": 0.14125396311283112, "epoch": 3.14, "learning_rate": 2.905760230047068e-05, "loss": 0.1457, "step": 3306, "task_loss": 0.18620248138904572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7775743122873398, "compression_loss": 0.0, "distillation_loss": 0.05440014600753784, "epoch": 3.14, "learning_rate": 2.9047087966676327e-05, "loss": 0.0512, "step": 3307, "task_loss": 0.021976150572299957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777587454905317, "compression_loss": 0.0, "distillation_loss": 0.10159926116466522, "epoch": 3.14, "learning_rate": 2.903657289770896e-05, "loss": 0.0933, "step": 3308, "task_loss": 0.018225595355033875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777600592387453, "compression_loss": 0.0, "distillation_loss": 0.05529090762138367, "epoch": 3.14, "learning_rate": 2.902605709547868e-05, "loss": 0.0516, "step": 3309, "task_loss": 0.01865418255329132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776137247347515, "compression_loss": 0.0, "distillation_loss": 0.15098199248313904, "epoch": 3.14, "learning_rate": 2.9015540561895738e-05, "loss": 0.1434, "step": 3310, "task_loss": 0.07475800067186356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777626851948216, "compression_loss": 0.0, "distillation_loss": 0.0264451764523983, "epoch": 3.14, "learning_rate": 2.9005023298870514e-05, "loss": 0.0323, "step": 3311, "task_loss": 0.08548250794410706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776399740288504, "compression_loss": 0.0, "distillation_loss": 0.24020592868328094, "epoch": 3.15, "learning_rate": 2.8994505308313523e-05, "loss": 0.2399, "step": 3312, "task_loss": 0.2373048961162567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776530909776581, "compression_loss": 0.0, "distillation_loss": 0.16168448328971863, "epoch": 3.15, "learning_rate": 2.8983986592135404e-05, "loss": 0.1736, "step": 3313, "task_loss": 0.2803611159324646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776662027956431, "compression_loss": 0.0, "distillation_loss": 0.2303021252155304, "epoch": 3.15, "learning_rate": 2.897346715224693e-05, "loss": 0.23, "step": 3314, "task_loss": 0.22724005579948425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776793094838088, "compression_loss": 0.0, "distillation_loss": 0.12179985642433167, "epoch": 3.15, "learning_rate": 2.8962946990559013e-05, "loss": 0.1226, "step": 3315, "task_loss": 0.1297818422317505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7776924110431591, "compression_loss": 0.0, "distillation_loss": 0.09324241429567337, "epoch": 3.15, "learning_rate": 2.8952426108982693e-05, "loss": 0.0956, "step": 3316, "task_loss": 0.11695347726345062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777055074746976, "compression_loss": 0.0, "distillation_loss": 0.06409186124801636, "epoch": 3.15, "learning_rate": 2.8941904509429134e-05, "loss": 0.0587, "step": 3317, "task_loss": 0.010216565802693367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777185987794278, "compression_loss": 0.0, "distillation_loss": 0.1214500218629837, "epoch": 3.15, "learning_rate": 2.8931382193809635e-05, "loss": 0.1167, "step": 3318, "task_loss": 0.0738031342625618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777316849583537, "compression_loss": 0.0, "distillation_loss": 0.026341602206230164, "epoch": 3.15, "learning_rate": 2.8920859164035625e-05, "loss": 0.024, "step": 3319, "task_loss": 0.00342458114027977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777447660124788, "compression_loss": 0.0, "distillation_loss": 0.09703463315963745, "epoch": 3.15, "learning_rate": 2.8910335422018664e-05, "loss": 0.0881, "step": 3320, "task_loss": 0.00742473267018795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777578419428068, "compression_loss": 0.0, "distillation_loss": 0.16258884966373444, "epoch": 3.15, "learning_rate": 2.8899810969670448e-05, "loss": 0.1614, "step": 3321, "task_loss": 0.15071289241313934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777709127503414, "compression_loss": 0.0, "distillation_loss": 0.04823547974228859, "epoch": 3.15, "learning_rate": 2.8889285808902784e-05, "loss": 0.0598, "step": 3322, "task_loss": 0.16371826827526093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777839784360863, "compression_loss": 0.0, "distillation_loss": 0.023103870451450348, "epoch": 3.16, "learning_rate": 2.887875994162762e-05, "loss": 0.0309, "step": 3323, "task_loss": 0.10059693455696106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7777970390010451, "compression_loss": 0.0, "distillation_loss": 0.1937963366508484, "epoch": 3.16, "learning_rate": 2.886823336975703e-05, "loss": 0.1993, "step": 3324, "task_loss": 0.248738631606102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778100944462216, "compression_loss": 0.0, "distillation_loss": 0.015010814182460308, "epoch": 3.16, "learning_rate": 2.885770609520323e-05, "loss": 0.014, "step": 3325, "task_loss": 0.004603438079357147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778231447726194, "compression_loss": 0.0, "distillation_loss": 0.05482349917292595, "epoch": 3.16, "learning_rate": 2.8847178119878527e-05, "loss": 0.0568, "step": 3326, "task_loss": 0.07430904358625412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778361899812423, "compression_loss": 0.0, "distillation_loss": 0.12944038212299347, "epoch": 3.16, "learning_rate": 2.883664944569539e-05, "loss": 0.1358, "step": 3327, "task_loss": 0.19279280304908752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778492300730937, "compression_loss": 0.0, "distillation_loss": 0.024749279022216797, "epoch": 3.16, "learning_rate": 2.8826120074566414e-05, "loss": 0.0414, "step": 3328, "task_loss": 0.19170710444450378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778622650491777, "compression_loss": 0.0, "distillation_loss": 0.04718421772122383, "epoch": 3.16, "learning_rate": 2.8815590008404293e-05, "loss": 0.0442, "step": 3329, "task_loss": 0.017278321087360382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778752949104976, "compression_loss": 0.0, "distillation_loss": 0.06321703642606735, "epoch": 3.16, "learning_rate": 2.8805059249121874e-05, "loss": 0.0583, "step": 3330, "task_loss": 0.013739963993430138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7778883196580573, "compression_loss": 0.0, "distillation_loss": 0.17093795537948608, "epoch": 3.16, "learning_rate": 2.8794527798632117e-05, "loss": 0.1645, "step": 3331, "task_loss": 0.10624982416629791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779013392928603, "compression_loss": 0.0, "distillation_loss": 0.09663266688585281, "epoch": 3.16, "learning_rate": 2.8783995658848105e-05, "loss": 0.1007, "step": 3332, "task_loss": 0.13778991997241974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779143538159106, "compression_loss": 0.0, "distillation_loss": 0.12523743510246277, "epoch": 3.17, "learning_rate": 2.877346283168306e-05, "loss": 0.1164, "step": 3333, "task_loss": 0.03714917227625847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779273632282115, "compression_loss": 0.0, "distillation_loss": 0.1300932914018631, "epoch": 3.17, "learning_rate": 2.876292931905032e-05, "loss": 0.1346, "step": 3334, "task_loss": 0.17540670931339264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779403675307669, "compression_loss": 0.0, "distillation_loss": 0.16550663113594055, "epoch": 3.17, "learning_rate": 2.875239512286335e-05, "loss": 0.17, "step": 3335, "task_loss": 0.21077799797058105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779533667245805, "compression_loss": 0.0, "distillation_loss": 0.10607212781906128, "epoch": 3.17, "learning_rate": 2.8741860245035722e-05, "loss": 0.0985, "step": 3336, "task_loss": 0.030357446521520615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777966360810656, "compression_loss": 0.0, "distillation_loss": 0.0917745977640152, "epoch": 3.17, "learning_rate": 2.8731324687481176e-05, "loss": 0.0947, "step": 3337, "task_loss": 0.12098343670368195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7779793497899968, "compression_loss": 0.0, "distillation_loss": 0.12411828339099884, "epoch": 3.17, "learning_rate": 2.8720788452113517e-05, "loss": 0.115, "step": 3338, "task_loss": 0.03254596143960953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.777992333663607, "compression_loss": 0.0, "distillation_loss": 0.12530258297920227, "epoch": 3.17, "learning_rate": 2.8710251540846723e-05, "loss": 0.1194, "step": 3339, "task_loss": 0.06607921421527863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780053124324899, "compression_loss": 0.0, "distillation_loss": 0.10954394936561584, "epoch": 3.17, "learning_rate": 2.8699713955594864e-05, "loss": 0.1203, "step": 3340, "task_loss": 0.21743568778038025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780182860976496, "compression_loss": 0.0, "distillation_loss": 0.047306716442108154, "epoch": 3.17, "learning_rate": 2.8689175698272147e-05, "loss": 0.045, "step": 3341, "task_loss": 0.02468855120241642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780312546600893, "compression_loss": 0.0, "distillation_loss": 0.09475719928741455, "epoch": 3.17, "learning_rate": 2.8678636770792906e-05, "loss": 0.0905, "step": 3342, "task_loss": 0.052515700459480286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778044218120813, "compression_loss": 0.0, "distillation_loss": 0.1709882766008377, "epoch": 3.17, "learning_rate": 2.8668097175071572e-05, "loss": 0.1834, "step": 3343, "task_loss": 0.29515278339385986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780571764808243, "compression_loss": 0.0, "distillation_loss": 0.04090896621346474, "epoch": 3.18, "learning_rate": 2.865755691302272e-05, "loss": 0.0383, "step": 3344, "task_loss": 0.015103261917829514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780701297411269, "compression_loss": 0.0, "distillation_loss": 0.07005221396684647, "epoch": 3.18, "learning_rate": 2.864701598656104e-05, "loss": 0.0843, "step": 3345, "task_loss": 0.21217550337314606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780830779027245, "compression_loss": 0.0, "distillation_loss": 0.030060699209570885, "epoch": 3.18, "learning_rate": 2.8636474397601343e-05, "loss": 0.0275, "step": 3346, "task_loss": 0.00450095534324646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7780960209666207, "compression_loss": 0.0, "distillation_loss": 0.2950228452682495, "epoch": 3.18, "learning_rate": 2.862593214805856e-05, "loss": 0.2962, "step": 3347, "task_loss": 0.30634093284606934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781089589338193, "compression_loss": 0.0, "distillation_loss": 0.0678018108010292, "epoch": 3.18, "learning_rate": 2.8615389239847734e-05, "loss": 0.0648, "step": 3348, "task_loss": 0.03745712339878082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781218918053238, "compression_loss": 0.0, "distillation_loss": 0.1801690310239792, "epoch": 3.18, "learning_rate": 2.8604845674884045e-05, "loss": 0.1866, "step": 3349, "task_loss": 0.24447283148765564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778134819582138, "compression_loss": 0.0, "distillation_loss": 0.05510277673602104, "epoch": 3.18, "learning_rate": 2.8594301455082777e-05, "loss": 0.0609, "step": 3350, "task_loss": 0.11345023661851883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781477422652656, "compression_loss": 0.0, "distillation_loss": 0.1684875637292862, "epoch": 3.18, "learning_rate": 2.8583756582359338e-05, "loss": 0.1605, "step": 3351, "task_loss": 0.08910365402698517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781606598557104, "compression_loss": 0.0, "distillation_loss": 0.13622252643108368, "epoch": 3.18, "learning_rate": 2.8573211058629262e-05, "loss": 0.1293, "step": 3352, "task_loss": 0.06717808544635773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781735723544758, "compression_loss": 0.0, "distillation_loss": 0.03606577590107918, "epoch": 3.18, "learning_rate": 2.8562664885808176e-05, "loss": 0.0345, "step": 3353, "task_loss": 0.020131606608629227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781864797625656, "compression_loss": 0.0, "distillation_loss": 0.07601951062679291, "epoch": 3.19, "learning_rate": 2.8552118065811868e-05, "loss": 0.086, "step": 3354, "task_loss": 0.17620819807052612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7781993820809836, "compression_loss": 0.0, "distillation_loss": 0.032324016094207764, "epoch": 3.19, "learning_rate": 2.85415706005562e-05, "loss": 0.0297, "step": 3355, "task_loss": 0.0057275425642728806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782122793107333, "compression_loss": 0.0, "distillation_loss": 0.08825677633285522, "epoch": 3.19, "learning_rate": 2.8531022491957178e-05, "loss": 0.0971, "step": 3356, "task_loss": 0.17651137709617615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782251714528186, "compression_loss": 0.0, "distillation_loss": 0.05497463047504425, "epoch": 3.19, "learning_rate": 2.852047374193092e-05, "loss": 0.0514, "step": 3357, "task_loss": 0.019507795572280884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778238058508243, "compression_loss": 0.0, "distillation_loss": 0.13246670365333557, "epoch": 3.19, "learning_rate": 2.850992435239364e-05, "loss": 0.1283, "step": 3358, "task_loss": 0.0911954715847969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782509404780101, "compression_loss": 0.0, "distillation_loss": 0.02414816804230213, "epoch": 3.19, "learning_rate": 2.8499374325261708e-05, "loss": 0.03, "step": 3359, "task_loss": 0.08249876648187637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782638173631239, "compression_loss": 0.0, "distillation_loss": 0.041819360107183456, "epoch": 3.19, "learning_rate": 2.848882366245157e-05, "loss": 0.0381, "step": 3360, "task_loss": 0.004362896084785461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782766891645878, "compression_loss": 0.0, "distillation_loss": 0.08119907230138779, "epoch": 3.19, "learning_rate": 2.847827236587982e-05, "loss": 0.092, "step": 3361, "task_loss": 0.18970143795013428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7782895558834056, "compression_loss": 0.0, "distillation_loss": 0.05240718275308609, "epoch": 3.19, "learning_rate": 2.846772043746313e-05, "loss": 0.0514, "step": 3362, "task_loss": 0.04256965219974518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778302417520581, "compression_loss": 0.0, "distillation_loss": 0.1657370626926422, "epoch": 3.19, "learning_rate": 2.845716787911833e-05, "loss": 0.1659, "step": 3363, "task_loss": 0.16752569377422333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783152740771176, "compression_loss": 0.0, "distillation_loss": 0.06295058131217957, "epoch": 3.19, "learning_rate": 2.8446614692762336e-05, "loss": 0.0575, "step": 3364, "task_loss": 0.008071176707744598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783281255540192, "compression_loss": 0.0, "distillation_loss": 0.147892564535141, "epoch": 3.2, "learning_rate": 2.843606088031218e-05, "loss": 0.1436, "step": 3365, "task_loss": 0.10463625937700272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783409719522894, "compression_loss": 0.0, "distillation_loss": 0.02701820805668831, "epoch": 3.2, "learning_rate": 2.842550644368502e-05, "loss": 0.0337, "step": 3366, "task_loss": 0.09333845973014832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783538132729317, "compression_loss": 0.0, "distillation_loss": 0.024246441200375557, "epoch": 3.2, "learning_rate": 2.841495138479811e-05, "loss": 0.0222, "step": 3367, "task_loss": 0.003893321380019188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783666495169501, "compression_loss": 0.0, "distillation_loss": 0.16022557020187378, "epoch": 3.2, "learning_rate": 2.8404395705568848e-05, "loss": 0.1523, "step": 3368, "task_loss": 0.08084793388843536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783794806853482, "compression_loss": 0.0, "distillation_loss": 0.03452833741903305, "epoch": 3.2, "learning_rate": 2.8393839407914702e-05, "loss": 0.0374, "step": 3369, "task_loss": 0.06310579180717468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7783923067791296, "compression_loss": 0.0, "distillation_loss": 0.11381056159734726, "epoch": 3.2, "learning_rate": 2.8383282493753283e-05, "loss": 0.1112, "step": 3370, "task_loss": 0.08721385896205902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778405127799298, "compression_loss": 0.0, "distillation_loss": 0.11384284496307373, "epoch": 3.2, "learning_rate": 2.83727249650023e-05, "loss": 0.1135, "step": 3371, "task_loss": 0.11086120456457138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784179437468572, "compression_loss": 0.0, "distillation_loss": 0.04285869747400284, "epoch": 3.2, "learning_rate": 2.836216682357959e-05, "loss": 0.063, "step": 3372, "task_loss": 0.24387109279632568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784307546228107, "compression_loss": 0.0, "distillation_loss": 0.10957767069339752, "epoch": 3.2, "learning_rate": 2.8351608071403085e-05, "loss": 0.1169, "step": 3373, "task_loss": 0.18312203884124756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784435604281623, "compression_loss": 0.0, "distillation_loss": 0.06287790834903717, "epoch": 3.2, "learning_rate": 2.8341048710390832e-05, "loss": 0.0677, "step": 3374, "task_loss": 0.11077691614627838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784563611639156, "compression_loss": 0.0, "distillation_loss": 0.035941578447818756, "epoch": 3.21, "learning_rate": 2.8330488742460987e-05, "loss": 0.033, "step": 3375, "task_loss": 0.006380628794431686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784691568310744, "compression_loss": 0.0, "distillation_loss": 0.14878231287002563, "epoch": 3.21, "learning_rate": 2.8319928169531825e-05, "loss": 0.1419, "step": 3376, "task_loss": 0.08023767173290253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784819474306423, "compression_loss": 0.0, "distillation_loss": 0.15483559668064117, "epoch": 3.21, "learning_rate": 2.830936699352172e-05, "loss": 0.1589, "step": 3377, "task_loss": 0.1951579451560974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7784947329636229, "compression_loss": 0.0, "distillation_loss": 0.22917942702770233, "epoch": 3.21, "learning_rate": 2.8298805216349167e-05, "loss": 0.215, "step": 3378, "task_loss": 0.08718053251504898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785075134310201, "compression_loss": 0.0, "distillation_loss": 0.266732782125473, "epoch": 3.21, "learning_rate": 2.8288242839932744e-05, "loss": 0.2629, "step": 3379, "task_loss": 0.2285778522491455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785202888338374, "compression_loss": 0.0, "distillation_loss": 0.09135205298662186, "epoch": 3.21, "learning_rate": 2.8277679866191194e-05, "loss": 0.1011, "step": 3380, "task_loss": 0.18895608186721802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785330591730785, "compression_loss": 0.0, "distillation_loss": 0.10083561390638351, "epoch": 3.21, "learning_rate": 2.8267116297043294e-05, "loss": 0.1137, "step": 3381, "task_loss": 0.22926008701324463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785458244497472, "compression_loss": 0.0, "distillation_loss": 0.020816469565033913, "epoch": 3.21, "learning_rate": 2.8256552134407993e-05, "loss": 0.0192, "step": 3382, "task_loss": 0.004540523514151573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785585846648471, "compression_loss": 0.0, "distillation_loss": 0.05450872331857681, "epoch": 3.21, "learning_rate": 2.8245987380204313e-05, "loss": 0.0556, "step": 3383, "task_loss": 0.0653524175286293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785713398193818, "compression_loss": 0.0, "distillation_loss": 0.05758683755993843, "epoch": 3.21, "learning_rate": 2.8235422036351382e-05, "loss": 0.0611, "step": 3384, "task_loss": 0.09230072051286697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785840899143551, "compression_loss": 0.0, "distillation_loss": 0.1324225813150406, "epoch": 3.21, "learning_rate": 2.822485610476847e-05, "loss": 0.1397, "step": 3385, "task_loss": 0.2054787278175354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7785968349507707, "compression_loss": 0.0, "distillation_loss": 0.10634157061576843, "epoch": 3.22, "learning_rate": 2.8214289587374908e-05, "loss": 0.1103, "step": 3386, "task_loss": 0.14617042243480682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786095749296322, "compression_loss": 0.0, "distillation_loss": 0.1770530492067337, "epoch": 3.22, "learning_rate": 2.8203722486090168e-05, "loss": 0.1791, "step": 3387, "task_loss": 0.19720126688480377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786223098519434, "compression_loss": 0.0, "distillation_loss": 0.27764710783958435, "epoch": 3.22, "learning_rate": 2.8193154802833803e-05, "loss": 0.2611, "step": 3388, "task_loss": 0.11260776221752167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786350397187077, "compression_loss": 0.0, "distillation_loss": 0.052394554018974304, "epoch": 3.22, "learning_rate": 2.818258653952549e-05, "loss": 0.0817, "step": 3389, "task_loss": 0.3452882766723633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786477645309291, "compression_loss": 0.0, "distillation_loss": 0.023975854739546776, "epoch": 3.22, "learning_rate": 2.8172017698085013e-05, "loss": 0.0298, "step": 3390, "task_loss": 0.08264005184173584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786604842896112, "compression_loss": 0.0, "distillation_loss": 0.16987451910972595, "epoch": 3.22, "learning_rate": 2.816144828043224e-05, "loss": 0.1559, "step": 3391, "task_loss": 0.030469371005892754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786731989957576, "compression_loss": 0.0, "distillation_loss": 0.08702673763036728, "epoch": 3.22, "learning_rate": 2.8150878288487155e-05, "loss": 0.0822, "step": 3392, "task_loss": 0.03916516155004501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778685908650372, "compression_loss": 0.0, "distillation_loss": 0.030004359781742096, "epoch": 3.22, "learning_rate": 2.8140307724169857e-05, "loss": 0.0351, "step": 3393, "task_loss": 0.08116083592176437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7786986132544582, "compression_loss": 0.0, "distillation_loss": 0.18179570138454437, "epoch": 3.22, "learning_rate": 2.812973658940054e-05, "loss": 0.199, "step": 3394, "task_loss": 0.3536381423473358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787113128090197, "compression_loss": 0.0, "distillation_loss": 0.1652180403470993, "epoch": 3.22, "learning_rate": 2.8119164886099504e-05, "loss": 0.1694, "step": 3395, "task_loss": 0.2065887749195099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787240073150603, "compression_loss": 0.0, "distillation_loss": 0.10243000090122223, "epoch": 3.23, "learning_rate": 2.8108592616187133e-05, "loss": 0.1072, "step": 3396, "task_loss": 0.15036579966545105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787366967735837, "compression_loss": 0.0, "distillation_loss": 0.12697230279445648, "epoch": 3.23, "learning_rate": 2.8098019781583944e-05, "loss": 0.1212, "step": 3397, "task_loss": 0.06895315647125244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787493811855934, "compression_loss": 0.0, "distillation_loss": 0.046805739402770996, "epoch": 3.23, "learning_rate": 2.8087446384210547e-05, "loss": 0.0473, "step": 3398, "task_loss": 0.05141756683588028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787620605520934, "compression_loss": 0.0, "distillation_loss": 0.024082593619823456, "epoch": 3.23, "learning_rate": 2.8076872425987637e-05, "loss": 0.0225, "step": 3399, "task_loss": 0.00831957720220089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787747348740871, "compression_loss": 0.0, "distillation_loss": 0.14285123348236084, "epoch": 3.23, "learning_rate": 2.8066297908836043e-05, "loss": 0.1427, "step": 3400, "task_loss": 0.14182282984256744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7787874041525783, "compression_loss": 0.0, "distillation_loss": 0.08242295682430267, "epoch": 3.23, "learning_rate": 2.8055722834676658e-05, "loss": 0.0958, "step": 3401, "task_loss": 0.21660152077674866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788000683885706, "compression_loss": 0.0, "distillation_loss": 0.038724031299352646, "epoch": 3.23, "learning_rate": 2.804514720543051e-05, "loss": 0.0355, "step": 3402, "task_loss": 0.006068244576454163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788127275830679, "compression_loss": 0.0, "distillation_loss": 0.08571316301822662, "epoch": 3.23, "learning_rate": 2.80345710230187e-05, "loss": 0.0914, "step": 3403, "task_loss": 0.14219190180301666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788253817370736, "compression_loss": 0.0, "distillation_loss": 0.07485407590866089, "epoch": 3.23, "learning_rate": 2.802399428936246e-05, "loss": 0.0825, "step": 3404, "task_loss": 0.15130427479743958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788380308515915, "compression_loss": 0.0, "distillation_loss": 0.09159151464700699, "epoch": 3.23, "learning_rate": 2.8013417006383076e-05, "loss": 0.09, "step": 3405, "task_loss": 0.07594650983810425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788506749276254, "compression_loss": 0.0, "distillation_loss": 0.0715874582529068, "epoch": 3.23, "learning_rate": 2.8002839176001987e-05, "loss": 0.0843, "step": 3406, "task_loss": 0.19857263565063477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788633139661788, "compression_loss": 0.0, "distillation_loss": 0.13416993618011475, "epoch": 3.24, "learning_rate": 2.799226080014071e-05, "loss": 0.1421, "step": 3407, "task_loss": 0.21369251608848572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7788759479682554, "compression_loss": 0.0, "distillation_loss": 0.09951282292604446, "epoch": 3.24, "learning_rate": 2.7981681880720838e-05, "loss": 0.0955, "step": 3408, "task_loss": 0.058943700045347214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778888576934859, "compression_loss": 0.0, "distillation_loss": 0.3170393109321594, "epoch": 3.24, "learning_rate": 2.7971102419664103e-05, "loss": 0.3122, "step": 3409, "task_loss": 0.2684779763221741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789012008669933, "compression_loss": 0.0, "distillation_loss": 0.04012390226125717, "epoch": 3.24, "learning_rate": 2.7960522418892288e-05, "loss": 0.0552, "step": 3410, "task_loss": 0.1905803382396698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789138197656618, "compression_loss": 0.0, "distillation_loss": 0.08090061694383621, "epoch": 3.24, "learning_rate": 2.794994188032733e-05, "loss": 0.0841, "step": 3411, "task_loss": 0.11296429485082626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789264336318683, "compression_loss": 0.0, "distillation_loss": 0.08891788125038147, "epoch": 3.24, "learning_rate": 2.7939360805891218e-05, "loss": 0.0852, "step": 3412, "task_loss": 0.05205078423023224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789390424666165, "compression_loss": 0.0, "distillation_loss": 0.13765235245227814, "epoch": 3.24, "learning_rate": 2.7928779197506056e-05, "loss": 0.1313, "step": 3413, "task_loss": 0.07377782464027405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789516462709101, "compression_loss": 0.0, "distillation_loss": 0.0779227688908577, "epoch": 3.24, "learning_rate": 2.7918197057094054e-05, "loss": 0.0794, "step": 3414, "task_loss": 0.09290515631437302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789642450457527, "compression_loss": 0.0, "distillation_loss": 0.15904906392097473, "epoch": 3.24, "learning_rate": 2.7907614386577497e-05, "loss": 0.1513, "step": 3415, "task_loss": 0.08202332258224487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.778976838792148, "compression_loss": 0.0, "distillation_loss": 0.05591778829693794, "epoch": 3.24, "learning_rate": 2.789703118787879e-05, "loss": 0.066, "step": 3416, "task_loss": 0.15640464425086975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7789894275110996, "compression_loss": 0.0, "distillation_loss": 0.2858087122440338, "epoch": 3.25, "learning_rate": 2.7886447462920412e-05, "loss": 0.2745, "step": 3417, "task_loss": 0.17287422716617584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790020112036113, "compression_loss": 0.0, "distillation_loss": 0.05627952516078949, "epoch": 3.25, "learning_rate": 2.787586321362495e-05, "loss": 0.0517, "step": 3418, "task_loss": 0.010653091594576836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790145898706868, "compression_loss": 0.0, "distillation_loss": 0.15222644805908203, "epoch": 3.25, "learning_rate": 2.7865278441915082e-05, "loss": 0.1488, "step": 3419, "task_loss": 0.11802099645137787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790271635133298, "compression_loss": 0.0, "distillation_loss": 0.05383795499801636, "epoch": 3.25, "learning_rate": 2.785469314971359e-05, "loss": 0.0516, "step": 3420, "task_loss": 0.03138484060764313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779039732132544, "compression_loss": 0.0, "distillation_loss": 0.03762197121977806, "epoch": 3.25, "learning_rate": 2.7844107338943343e-05, "loss": 0.047, "step": 3421, "task_loss": 0.13129746913909912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790522957293328, "compression_loss": 0.0, "distillation_loss": 0.13689181208610535, "epoch": 3.25, "learning_rate": 2.7833521011527293e-05, "loss": 0.1453, "step": 3422, "task_loss": 0.221421480178833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790648543047002, "compression_loss": 0.0, "distillation_loss": 0.2846853733062744, "epoch": 3.25, "learning_rate": 2.782293416938851e-05, "loss": 0.2751, "step": 3423, "task_loss": 0.18841396272182465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790774078596497, "compression_loss": 0.0, "distillation_loss": 0.05968084558844566, "epoch": 3.25, "learning_rate": 2.7812346814450135e-05, "loss": 0.078, "step": 3424, "task_loss": 0.2424488067626953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7790899563951851, "compression_loss": 0.0, "distillation_loss": 0.08280298113822937, "epoch": 3.25, "learning_rate": 2.7801758948635414e-05, "loss": 0.0881, "step": 3425, "task_loss": 0.13536496460437775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791024999123101, "compression_loss": 0.0, "distillation_loss": 0.11989252269268036, "epoch": 3.25, "learning_rate": 2.77911705738677e-05, "loss": 0.1164, "step": 3426, "task_loss": 0.08493325114250183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791150384120282, "compression_loss": 0.0, "distillation_loss": 0.1451410949230194, "epoch": 3.25, "learning_rate": 2.7780581692070395e-05, "loss": 0.1492, "step": 3427, "task_loss": 0.18578004837036133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791275718953432, "compression_loss": 0.0, "distillation_loss": 0.16686482727527618, "epoch": 3.26, "learning_rate": 2.7769992305167043e-05, "loss": 0.1621, "step": 3428, "task_loss": 0.11886778473854065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791401003632589, "compression_loss": 0.0, "distillation_loss": 0.057650670409202576, "epoch": 3.26, "learning_rate": 2.775940241508124e-05, "loss": 0.057, "step": 3429, "task_loss": 0.05127601698040962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791526238167787, "compression_loss": 0.0, "distillation_loss": 0.053712643682956696, "epoch": 3.26, "learning_rate": 2.774881202373671e-05, "loss": 0.064, "step": 3430, "task_loss": 0.1569092869758606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791651422569066, "compression_loss": 0.0, "distillation_loss": 0.04081248864531517, "epoch": 3.26, "learning_rate": 2.773822113305723e-05, "loss": 0.0428, "step": 3431, "task_loss": 0.061016641557216644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791776556846461, "compression_loss": 0.0, "distillation_loss": 0.06572188436985016, "epoch": 3.26, "learning_rate": 2.7727629744966695e-05, "loss": 0.067, "step": 3432, "task_loss": 0.07893720269203186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7791901641010008, "compression_loss": 0.0, "distillation_loss": 0.02726941555738449, "epoch": 3.26, "learning_rate": 2.7717037861389082e-05, "loss": 0.0256, "step": 3433, "task_loss": 0.010105656459927559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792026675069746, "compression_loss": 0.0, "distillation_loss": 0.030473362654447556, "epoch": 3.26, "learning_rate": 2.7706445484248454e-05, "loss": 0.0291, "step": 3434, "task_loss": 0.017233194783329964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792151659035711, "compression_loss": 0.0, "distillation_loss": 0.2087365984916687, "epoch": 3.26, "learning_rate": 2.769585261546897e-05, "loss": 0.2018, "step": 3435, "task_loss": 0.13944987952709198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792276592917939, "compression_loss": 0.0, "distillation_loss": 0.04427679255604744, "epoch": 3.26, "learning_rate": 2.768525925697487e-05, "loss": 0.0431, "step": 3436, "task_loss": 0.03255880996584892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792401476726467, "compression_loss": 0.0, "distillation_loss": 0.10959358513355255, "epoch": 3.26, "learning_rate": 2.76746654106905e-05, "loss": 0.1043, "step": 3437, "task_loss": 0.05700678750872612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792526310471333, "compression_loss": 0.0, "distillation_loss": 0.18829983472824097, "epoch": 3.26, "learning_rate": 2.7664071078540282e-05, "loss": 0.1821, "step": 3438, "task_loss": 0.12653696537017822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792651094162573, "compression_loss": 0.0, "distillation_loss": 0.12829965353012085, "epoch": 3.27, "learning_rate": 2.7653476262448713e-05, "loss": 0.1339, "step": 3439, "task_loss": 0.18381281197071075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792775827810223, "compression_loss": 0.0, "distillation_loss": 0.06626725196838379, "epoch": 3.27, "learning_rate": 2.76428809643404e-05, "loss": 0.062, "step": 3440, "task_loss": 0.023818595334887505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7792900511424321, "compression_loss": 0.0, "distillation_loss": 0.03690072149038315, "epoch": 3.27, "learning_rate": 2.763228518614004e-05, "loss": 0.0498, "step": 3441, "task_loss": 0.16629740595817566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793025145014904, "compression_loss": 0.0, "distillation_loss": 0.04804535210132599, "epoch": 3.27, "learning_rate": 2.7621688929772393e-05, "loss": 0.0533, "step": 3442, "task_loss": 0.10066729038953781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793149728592008, "compression_loss": 0.0, "distillation_loss": 0.1166885569691658, "epoch": 3.27, "learning_rate": 2.761109219716233e-05, "loss": 0.1143, "step": 3443, "task_loss": 0.09277547895908356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793274262165669, "compression_loss": 0.0, "distillation_loss": 0.10858124494552612, "epoch": 3.27, "learning_rate": 2.760049499023479e-05, "loss": 0.113, "step": 3444, "task_loss": 0.15251390635967255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793398745745926, "compression_loss": 0.0, "distillation_loss": 0.03859622776508331, "epoch": 3.27, "learning_rate": 2.7589897310914814e-05, "loss": 0.0443, "step": 3445, "task_loss": 0.09562104940414429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793523179342815, "compression_loss": 0.0, "distillation_loss": 0.030183183029294014, "epoch": 3.27, "learning_rate": 2.7579299161127513e-05, "loss": 0.0287, "step": 3446, "task_loss": 0.01539078913629055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793647562966373, "compression_loss": 0.0, "distillation_loss": 0.06578610837459564, "epoch": 3.27, "learning_rate": 2.756870054279811e-05, "loss": 0.0839, "step": 3447, "task_loss": 0.24706564843654633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7793771896626636, "compression_loss": 0.0, "distillation_loss": 0.2527296543121338, "epoch": 3.27, "learning_rate": 2.755810145785187e-05, "loss": 0.2617, "step": 3448, "task_loss": 0.3425305485725403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779389618033364, "compression_loss": 0.0, "distillation_loss": 0.10953805595636368, "epoch": 3.28, "learning_rate": 2.754750190821418e-05, "loss": 0.1019, "step": 3449, "task_loss": 0.03293545916676521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794020414097425, "compression_loss": 0.0, "distillation_loss": 0.2223641574382782, "epoch": 3.28, "learning_rate": 2.753690189581051e-05, "loss": 0.2129, "step": 3450, "task_loss": 0.12748362123966217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794144597928024, "compression_loss": 0.0, "distillation_loss": 0.07554468512535095, "epoch": 3.28, "learning_rate": 2.752630142256638e-05, "loss": 0.0726, "step": 3451, "task_loss": 0.045700106769800186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794268731835476, "compression_loss": 0.0, "distillation_loss": 0.0460401251912117, "epoch": 3.28, "learning_rate": 2.7515700490407443e-05, "loss": 0.0571, "step": 3452, "task_loss": 0.15684375166893005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794392815829819, "compression_loss": 0.0, "distillation_loss": 0.07692290842533112, "epoch": 3.28, "learning_rate": 2.7505099101259386e-05, "loss": 0.0725, "step": 3453, "task_loss": 0.03274589031934738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794516849921087, "compression_loss": 0.0, "distillation_loss": 0.1473102867603302, "epoch": 3.28, "learning_rate": 2.749449725704802e-05, "loss": 0.1434, "step": 3454, "task_loss": 0.1077522411942482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794640834119319, "compression_loss": 0.0, "distillation_loss": 0.10021187365055084, "epoch": 3.28, "learning_rate": 2.748389495969921e-05, "loss": 0.1095, "step": 3455, "task_loss": 0.19332432746887207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779476476843455, "compression_loss": 0.0, "distillation_loss": 0.03207477182149887, "epoch": 3.28, "learning_rate": 2.747329221113891e-05, "loss": 0.0303, "step": 3456, "task_loss": 0.01403326727449894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7794888652876819, "compression_loss": 0.0, "distillation_loss": 0.1490982323884964, "epoch": 3.28, "learning_rate": 2.7462689013293176e-05, "loss": 0.1534, "step": 3457, "task_loss": 0.1916988492012024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795012487456161, "compression_loss": 0.0, "distillation_loss": 0.07416212558746338, "epoch": 3.28, "learning_rate": 2.745208536808812e-05, "loss": 0.0814, "step": 3458, "task_loss": 0.14629532396793365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795136272182613, "compression_loss": 0.0, "distillation_loss": 0.3036563992500305, "epoch": 3.28, "learning_rate": 2.7441481277449954e-05, "loss": 0.2975, "step": 3459, "task_loss": 0.24229754507541656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795260007066213, "compression_loss": 0.0, "distillation_loss": 0.05779052898287773, "epoch": 3.29, "learning_rate": 2.743087674330495e-05, "loss": 0.0536, "step": 3460, "task_loss": 0.016016999259591103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795383692116996, "compression_loss": 0.0, "distillation_loss": 0.05794607847929001, "epoch": 3.29, "learning_rate": 2.742027176757948e-05, "loss": 0.054, "step": 3461, "task_loss": 0.018953843042254448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795507327345002, "compression_loss": 0.0, "distillation_loss": 0.13176500797271729, "epoch": 3.29, "learning_rate": 2.7409666352199986e-05, "loss": 0.1273, "step": 3462, "task_loss": 0.08700872212648392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795630912760264, "compression_loss": 0.0, "distillation_loss": 0.03117290511727333, "epoch": 3.29, "learning_rate": 2.7399060499092992e-05, "loss": 0.0383, "step": 3463, "task_loss": 0.10199519991874695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779575444837282, "compression_loss": 0.0, "distillation_loss": 0.03322270140051842, "epoch": 3.29, "learning_rate": 2.7388454210185115e-05, "loss": 0.0369, "step": 3464, "task_loss": 0.0694967657327652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7795877934192709, "compression_loss": 0.0, "distillation_loss": 0.06944756209850311, "epoch": 3.29, "learning_rate": 2.7377847487403018e-05, "loss": 0.0634, "step": 3465, "task_loss": 0.008718544617295265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796001370229966, "compression_loss": 0.0, "distillation_loss": 0.07945192605257034, "epoch": 3.29, "learning_rate": 2.736724033267347e-05, "loss": 0.0837, "step": 3466, "task_loss": 0.12163371592760086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796124756494627, "compression_loss": 0.0, "distillation_loss": 0.04697978496551514, "epoch": 3.29, "learning_rate": 2.7356632747923322e-05, "loss": 0.0436, "step": 3467, "task_loss": 0.013647403568029404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779624809299673, "compression_loss": 0.0, "distillation_loss": 0.10399874299764633, "epoch": 3.29, "learning_rate": 2.7346024735079486e-05, "loss": 0.1006, "step": 3468, "task_loss": 0.06967251002788544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796371379746312, "compression_loss": 0.0, "distillation_loss": 0.20231911540031433, "epoch": 3.29, "learning_rate": 2.7335416296068962e-05, "loss": 0.204, "step": 3469, "task_loss": 0.21951937675476074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779649461675341, "compression_loss": 0.0, "distillation_loss": 0.09952305257320404, "epoch": 3.3, "learning_rate": 2.7324807432818805e-05, "loss": 0.093, "step": 3470, "task_loss": 0.033969540148973465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796617804028059, "compression_loss": 0.0, "distillation_loss": 0.2023647427558899, "epoch": 3.3, "learning_rate": 2.731419814725619e-05, "loss": 0.1988, "step": 3471, "task_loss": 0.16686657071113586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796740941580298, "compression_loss": 0.0, "distillation_loss": 0.019200827926397324, "epoch": 3.3, "learning_rate": 2.730358844130834e-05, "loss": 0.0176, "step": 3472, "task_loss": 0.0035965926945209503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7796864029420163, "compression_loss": 0.0, "distillation_loss": 0.10850808024406433, "epoch": 3.3, "learning_rate": 2.729297831690255e-05, "loss": 0.1036, "step": 3473, "task_loss": 0.059668056666851044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779698706755769, "compression_loss": 0.0, "distillation_loss": 0.1171179711818695, "epoch": 3.3, "learning_rate": 2.728236777596621e-05, "loss": 0.1114, "step": 3474, "task_loss": 0.0599634051322937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797110056002917, "compression_loss": 0.0, "distillation_loss": 0.023795567452907562, "epoch": 3.3, "learning_rate": 2.7271756820426763e-05, "loss": 0.0218, "step": 3475, "task_loss": 0.003354804590344429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797232994765881, "compression_loss": 0.0, "distillation_loss": 0.05289144068956375, "epoch": 3.3, "learning_rate": 2.7261145452211763e-05, "loss": 0.0625, "step": 3476, "task_loss": 0.14865252375602722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797355883856617, "compression_loss": 0.0, "distillation_loss": 0.26715028285980225, "epoch": 3.3, "learning_rate": 2.725053367324879e-05, "loss": 0.271, "step": 3477, "task_loss": 0.3059850335121155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797478723285164, "compression_loss": 0.0, "distillation_loss": 0.03925115987658501, "epoch": 3.3, "learning_rate": 2.723992148546554e-05, "loss": 0.0448, "step": 3478, "task_loss": 0.0945422425866127 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797601513061558, "compression_loss": 0.0, "distillation_loss": 0.10821853578090668, "epoch": 3.3, "learning_rate": 2.7229308890789767e-05, "loss": 0.1106, "step": 3479, "task_loss": 0.13163337111473083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797724253195835, "compression_loss": 0.0, "distillation_loss": 0.026758499443531036, "epoch": 3.3, "learning_rate": 2.7218695891149293e-05, "loss": 0.0359, "step": 3480, "task_loss": 0.11779153347015381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797846943698032, "compression_loss": 0.0, "distillation_loss": 0.04860454425215721, "epoch": 3.31, "learning_rate": 2.720808248847203e-05, "loss": 0.0449, "step": 3481, "task_loss": 0.011549010872840881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7797969584578188, "compression_loss": 0.0, "distillation_loss": 0.11690667271614075, "epoch": 3.31, "learning_rate": 2.719746868468595e-05, "loss": 0.1101, "step": 3482, "task_loss": 0.04849759489297867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798092175846337, "compression_loss": 0.0, "distillation_loss": 0.13424092531204224, "epoch": 3.31, "learning_rate": 2.7186854481719092e-05, "loss": 0.1333, "step": 3483, "task_loss": 0.12473595142364502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798214717512517, "compression_loss": 0.0, "distillation_loss": 0.07486072927713394, "epoch": 3.31, "learning_rate": 2.7176239881499595e-05, "loss": 0.0961, "step": 3484, "task_loss": 0.28720682859420776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798337209586765, "compression_loss": 0.0, "distillation_loss": 0.1800866276025772, "epoch": 3.31, "learning_rate": 2.716562488595563e-05, "loss": 0.1734, "step": 3485, "task_loss": 0.11275843530893326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798459652079118, "compression_loss": 0.0, "distillation_loss": 0.045273929834365845, "epoch": 3.31, "learning_rate": 2.715500949701549e-05, "loss": 0.0426, "step": 3486, "task_loss": 0.0184002872556448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798582044999612, "compression_loss": 0.0, "distillation_loss": 0.07528147101402283, "epoch": 3.31, "learning_rate": 2.7144393716607486e-05, "loss": 0.0723, "step": 3487, "task_loss": 0.04534187912940979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798704388358284, "compression_loss": 0.0, "distillation_loss": 0.1249753087759018, "epoch": 3.31, "learning_rate": 2.713377754666004e-05, "loss": 0.1209, "step": 3488, "task_loss": 0.08464540541172028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7798826682165172, "compression_loss": 0.0, "distillation_loss": 0.2507629990577698, "epoch": 3.31, "learning_rate": 2.712316098910162e-05, "loss": 0.2497, "step": 3489, "task_loss": 0.23983043432235718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.779894892643031, "compression_loss": 0.0, "distillation_loss": 0.017407327890396118, "epoch": 3.31, "learning_rate": 2.711254404586079e-05, "loss": 0.0162, "step": 3490, "task_loss": 0.004980321973562241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799071121163739, "compression_loss": 0.0, "distillation_loss": 0.16080042719841003, "epoch": 3.32, "learning_rate": 2.7101926718866156e-05, "loss": 0.1569, "step": 3491, "task_loss": 0.12228196114301682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799193266375491, "compression_loss": 0.0, "distillation_loss": 0.20948654413223267, "epoch": 3.32, "learning_rate": 2.7091309010046408e-05, "loss": 0.2093, "step": 3492, "task_loss": 0.2079431563615799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799315362075607, "compression_loss": 0.0, "distillation_loss": 0.11191666126251221, "epoch": 3.32, "learning_rate": 2.708069092133031e-05, "loss": 0.1132, "step": 3493, "task_loss": 0.12430918961763382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799437408274122, "compression_loss": 0.0, "distillation_loss": 0.11181183904409409, "epoch": 3.32, "learning_rate": 2.7070072454646683e-05, "loss": 0.106, "step": 3494, "task_loss": 0.05379234626889229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799559404981073, "compression_loss": 0.0, "distillation_loss": 0.09605347365140915, "epoch": 3.32, "learning_rate": 2.7059453611924433e-05, "loss": 0.0923, "step": 3495, "task_loss": 0.058432966470718384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799681352206498, "compression_loss": 0.0, "distillation_loss": 0.06996072828769684, "epoch": 3.32, "learning_rate": 2.7048834395092505e-05, "loss": 0.0752, "step": 3496, "task_loss": 0.12205319851636887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799803249960431, "compression_loss": 0.0, "distillation_loss": 0.09135409444570541, "epoch": 3.32, "learning_rate": 2.7038214806079948e-05, "loss": 0.0947, "step": 3497, "task_loss": 0.12439997494220734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7799925098252911, "compression_loss": 0.0, "distillation_loss": 0.04232946038246155, "epoch": 3.32, "learning_rate": 2.702759484681585e-05, "loss": 0.0455, "step": 3498, "task_loss": 0.07386565208435059 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800046897093974, "compression_loss": 0.0, "distillation_loss": 0.06012481451034546, "epoch": 3.32, "learning_rate": 2.701697451922939e-05, "loss": 0.0557, "step": 3499, "task_loss": 0.015828527510166168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800168646493657, "compression_loss": 0.0, "distillation_loss": 0.13709309697151184, "epoch": 3.32, "learning_rate": 2.7006353825249792e-05, "loss": 0.1315, "step": 3500, "task_loss": 0.08080706745386124 }, { "epoch": 3.32, "eval_accuracy": 0.8967889908256881, "eval_loss": 0.41444137692451477, "eval_runtime": 18.5229, "eval_samples_per_second": 47.077, "eval_steps_per_second": 5.885, "step": 3500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800290346461998, "compression_loss": 0.0, "distillation_loss": 0.05133210867643356, "epoch": 3.32, "learning_rate": 2.6995732766806354e-05, "loss": 0.0478, "step": 3501, "task_loss": 0.015585673972964287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800411997009031, "compression_loss": 0.0, "distillation_loss": 0.04098585247993469, "epoch": 3.33, "learning_rate": 2.6985111345828452e-05, "loss": 0.0458, "step": 3502, "task_loss": 0.0887080505490303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800533598144795, "compression_loss": 0.0, "distillation_loss": 0.07880129665136337, "epoch": 3.33, "learning_rate": 2.6974489564245513e-05, "loss": 0.0739, "step": 3503, "task_loss": 0.030195550993084908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800655149879328, "compression_loss": 0.0, "distillation_loss": 0.07193966954946518, "epoch": 3.33, "learning_rate": 2.6963867423987032e-05, "loss": 0.0733, "step": 3504, "task_loss": 0.08535440266132355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7800776652222663, "compression_loss": 0.0, "distillation_loss": 0.23703370988368988, "epoch": 3.33, "learning_rate": 2.695324492698258e-05, "loss": 0.2278, "step": 3505, "task_loss": 0.14506091177463531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780089810518484, "compression_loss": 0.0, "distillation_loss": 0.27318769693374634, "epoch": 3.33, "learning_rate": 2.694262207516178e-05, "loss": 0.2656, "step": 3506, "task_loss": 0.1974327713251114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801019508775895, "compression_loss": 0.0, "distillation_loss": 0.03283765912055969, "epoch": 3.33, "learning_rate": 2.6931998870454327e-05, "loss": 0.0311, "step": 3507, "task_loss": 0.015567878261208534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801140863005865, "compression_loss": 0.0, "distillation_loss": 0.08824481815099716, "epoch": 3.33, "learning_rate": 2.692137531478997e-05, "loss": 0.0837, "step": 3508, "task_loss": 0.042351722717285156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801262167884786, "compression_loss": 0.0, "distillation_loss": 0.10943618416786194, "epoch": 3.33, "learning_rate": 2.6910751410098532e-05, "loss": 0.1129, "step": 3509, "task_loss": 0.14431238174438477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801383423422695, "compression_loss": 0.0, "distillation_loss": 0.10514090955257416, "epoch": 3.33, "learning_rate": 2.6900127158309903e-05, "loss": 0.1055, "step": 3510, "task_loss": 0.10892733186483383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780150462962963, "compression_loss": 0.0, "distillation_loss": 0.1573885828256607, "epoch": 3.33, "learning_rate": 2.688950256135402e-05, "loss": 0.1667, "step": 3511, "task_loss": 0.2509816586971283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801625786515627, "compression_loss": 0.0, "distillation_loss": 0.0467422753572464, "epoch": 3.34, "learning_rate": 2.6878877621160904e-05, "loss": 0.0526, "step": 3512, "task_loss": 0.10526955127716064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801746894090722, "compression_loss": 0.0, "distillation_loss": 0.0356249138712883, "epoch": 3.34, "learning_rate": 2.686825233966061e-05, "loss": 0.0428, "step": 3513, "task_loss": 0.10746340453624725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801867952364953, "compression_loss": 0.0, "distillation_loss": 0.07927364856004715, "epoch": 3.34, "learning_rate": 2.6857626718783285e-05, "loss": 0.0928, "step": 3514, "task_loss": 0.21453584730625153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7801988961348355, "compression_loss": 0.0, "distillation_loss": 0.21596963703632355, "epoch": 3.34, "learning_rate": 2.6847000760459118e-05, "loss": 0.2164, "step": 3515, "task_loss": 0.22040767967700958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802109921050968, "compression_loss": 0.0, "distillation_loss": 0.04428340867161751, "epoch": 3.34, "learning_rate": 2.683637446661837e-05, "loss": 0.0504, "step": 3516, "task_loss": 0.10571881383657455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802230831482826, "compression_loss": 0.0, "distillation_loss": 0.020135464146733284, "epoch": 3.34, "learning_rate": 2.6825747839191362e-05, "loss": 0.0284, "step": 3517, "task_loss": 0.10235545039176941 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802351692653968, "compression_loss": 0.0, "distillation_loss": 0.09116624295711517, "epoch": 3.34, "learning_rate": 2.681512088010845e-05, "loss": 0.0861, "step": 3518, "task_loss": 0.04042452201247215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802472504574429, "compression_loss": 0.0, "distillation_loss": 0.21908923983573914, "epoch": 3.34, "learning_rate": 2.6804493591300105e-05, "loss": 0.2111, "step": 3519, "task_loss": 0.13956592977046967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802593267254246, "compression_loss": 0.0, "distillation_loss": 0.09275214374065399, "epoch": 3.34, "learning_rate": 2.6793865974696803e-05, "loss": 0.0882, "step": 3520, "task_loss": 0.046760689467191696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802713980703457, "compression_loss": 0.0, "distillation_loss": 0.031631097197532654, "epoch": 3.34, "learning_rate": 2.67832380322291e-05, "loss": 0.0295, "step": 3521, "task_loss": 0.01028413511812687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802834644932098, "compression_loss": 0.0, "distillation_loss": 0.09307868778705597, "epoch": 3.34, "learning_rate": 2.6772609765827627e-05, "loss": 0.1006, "step": 3522, "task_loss": 0.16859915852546692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7802955259950206, "compression_loss": 0.0, "distillation_loss": 0.07208751142024994, "epoch": 3.35, "learning_rate": 2.6761981177423052e-05, "loss": 0.0748, "step": 3523, "task_loss": 0.09921170026063919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803075825767818, "compression_loss": 0.0, "distillation_loss": 0.05329529941082001, "epoch": 3.35, "learning_rate": 2.6751352268946118e-05, "loss": 0.0492, "step": 3524, "task_loss": 0.012433096766471863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803196342394971, "compression_loss": 0.0, "distillation_loss": 0.13451752066612244, "epoch": 3.35, "learning_rate": 2.6740723042327598e-05, "loss": 0.1327, "step": 3525, "task_loss": 0.11605449765920639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78033168098417, "compression_loss": 0.0, "distillation_loss": 0.08334813266992569, "epoch": 3.35, "learning_rate": 2.673009349949836e-05, "loss": 0.1004, "step": 3526, "task_loss": 0.2539750039577484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803437228118045, "compression_loss": 0.0, "distillation_loss": 0.11693807691335678, "epoch": 3.35, "learning_rate": 2.6719463642389302e-05, "loss": 0.1105, "step": 3527, "task_loss": 0.05235512554645538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780355759723404, "compression_loss": 0.0, "distillation_loss": 0.04535358399152756, "epoch": 3.35, "learning_rate": 2.6708833472931394e-05, "loss": 0.0491, "step": 3528, "task_loss": 0.08303047716617584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803677917199723, "compression_loss": 0.0, "distillation_loss": 0.058416105806827545, "epoch": 3.35, "learning_rate": 2.669820299305566e-05, "loss": 0.0618, "step": 3529, "task_loss": 0.09264393150806427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803798188025132, "compression_loss": 0.0, "distillation_loss": 0.0770866721868515, "epoch": 3.35, "learning_rate": 2.6687572204693174e-05, "loss": 0.0906, "step": 3530, "task_loss": 0.2119910567998886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7803918409720301, "compression_loss": 0.0, "distillation_loss": 0.042178817093372345, "epoch": 3.35, "learning_rate": 2.667694110977506e-05, "loss": 0.0418, "step": 3531, "task_loss": 0.03879820555448532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804038582295268, "compression_loss": 0.0, "distillation_loss": 0.11523336172103882, "epoch": 3.35, "learning_rate": 2.6666309710232522e-05, "loss": 0.1115, "step": 3532, "task_loss": 0.078341543674469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804158705760071, "compression_loss": 0.0, "distillation_loss": 0.1296311616897583, "epoch": 3.36, "learning_rate": 2.6655678007996804e-05, "loss": 0.1225, "step": 3533, "task_loss": 0.057877760380506516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804278780124746, "compression_loss": 0.0, "distillation_loss": 0.04751583933830261, "epoch": 3.36, "learning_rate": 2.66450460049992e-05, "loss": 0.0501, "step": 3534, "task_loss": 0.07348724454641342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804398805399331, "compression_loss": 0.0, "distillation_loss": 0.032740626484155655, "epoch": 3.36, "learning_rate": 2.6634413703171058e-05, "loss": 0.0306, "step": 3535, "task_loss": 0.011288370937108994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780451878159386, "compression_loss": 0.0, "distillation_loss": 0.02693828195333481, "epoch": 3.36, "learning_rate": 2.6623781104443806e-05, "loss": 0.0299, "step": 3536, "task_loss": 0.05642160773277283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804638708718372, "compression_loss": 0.0, "distillation_loss": 0.1141064316034317, "epoch": 3.36, "learning_rate": 2.6613148210748894e-05, "loss": 0.1129, "step": 3537, "task_loss": 0.10202029347419739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7804758586782903, "compression_loss": 0.0, "distillation_loss": 0.05455036461353302, "epoch": 3.36, "learning_rate": 2.6602515024017842e-05, "loss": 0.0595, "step": 3538, "task_loss": 0.1041443794965744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780487841579749, "compression_loss": 0.0, "distillation_loss": 0.03301164135336876, "epoch": 3.36, "learning_rate": 2.6591881546182216e-05, "loss": 0.0302, "step": 3539, "task_loss": 0.005180429667234421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780499819577217, "compression_loss": 0.0, "distillation_loss": 0.06954570859670639, "epoch": 3.36, "learning_rate": 2.6581247779173635e-05, "loss": 0.069, "step": 3540, "task_loss": 0.06367438286542892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780511792671698, "compression_loss": 0.0, "distillation_loss": 0.02071313187479973, "epoch": 3.36, "learning_rate": 2.6570613724923788e-05, "loss": 0.019, "step": 3541, "task_loss": 0.003429897129535675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805237608641957, "compression_loss": 0.0, "distillation_loss": 0.01836605742573738, "epoch": 3.36, "learning_rate": 2.655997938536439e-05, "loss": 0.017, "step": 3542, "task_loss": 0.004780923947691917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805357241557137, "compression_loss": 0.0, "distillation_loss": 0.031529366970062256, "epoch": 3.36, "learning_rate": 2.654934476242723e-05, "loss": 0.0292, "step": 3543, "task_loss": 0.008400822058320045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805476825472557, "compression_loss": 0.0, "distillation_loss": 0.026692230254411697, "epoch": 3.37, "learning_rate": 2.653870985804412e-05, "loss": 0.0248, "step": 3544, "task_loss": 0.007382074370980263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805596360398255, "compression_loss": 0.0, "distillation_loss": 0.036252401769161224, "epoch": 3.37, "learning_rate": 2.6528074674146963e-05, "loss": 0.0462, "step": 3545, "task_loss": 0.13550357520580292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805715846344266, "compression_loss": 0.0, "distillation_loss": 0.031404148787260056, "epoch": 3.37, "learning_rate": 2.6517439212667677e-05, "loss": 0.0292, "step": 3546, "task_loss": 0.009732730686664581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805835283320628, "compression_loss": 0.0, "distillation_loss": 0.08225059509277344, "epoch": 3.37, "learning_rate": 2.6506803475538256e-05, "loss": 0.0909, "step": 3547, "task_loss": 0.16853067278862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7805954671337377, "compression_loss": 0.0, "distillation_loss": 0.028768369928002357, "epoch": 3.37, "learning_rate": 2.649616746469072e-05, "loss": 0.0268, "step": 3548, "task_loss": 0.009422983974218369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806074010404551, "compression_loss": 0.0, "distillation_loss": 0.11904192715883255, "epoch": 3.37, "learning_rate": 2.648553118205716e-05, "loss": 0.1248, "step": 3549, "task_loss": 0.17703823745250702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806193300532185, "compression_loss": 0.0, "distillation_loss": 0.03019685298204422, "epoch": 3.37, "learning_rate": 2.6474894629569713e-05, "loss": 0.0293, "step": 3550, "task_loss": 0.02082175202667713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806312541730318, "compression_loss": 0.0, "distillation_loss": 0.08796247839927673, "epoch": 3.37, "learning_rate": 2.6464257809160548e-05, "loss": 0.0828, "step": 3551, "task_loss": 0.03604867681860924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806431734008986, "compression_loss": 0.0, "distillation_loss": 0.03714694827795029, "epoch": 3.37, "learning_rate": 2.6453620722761896e-05, "loss": 0.0501, "step": 3552, "task_loss": 0.16653135418891907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806550877378224, "compression_loss": 0.0, "distillation_loss": 0.15399909019470215, "epoch": 3.37, "learning_rate": 2.6442983372306045e-05, "loss": 0.1543, "step": 3553, "task_loss": 0.15673311054706573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806669971848073, "compression_loss": 0.0, "distillation_loss": 0.23555459082126617, "epoch": 3.38, "learning_rate": 2.643234575972531e-05, "loss": 0.2352, "step": 3554, "task_loss": 0.2320120632648468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7806789017428565, "compression_loss": 0.0, "distillation_loss": 0.0971142053604126, "epoch": 3.38, "learning_rate": 2.642170788695208e-05, "loss": 0.0907, "step": 3555, "task_loss": 0.03279740735888481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780690801412974, "compression_loss": 0.0, "distillation_loss": 0.05912064015865326, "epoch": 3.38, "learning_rate": 2.6411069755918755e-05, "loss": 0.0546, "step": 3556, "task_loss": 0.013454478234052658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807026961961634, "compression_loss": 0.0, "distillation_loss": 0.15938124060630798, "epoch": 3.38, "learning_rate": 2.6400431368557815e-05, "loss": 0.1581, "step": 3557, "task_loss": 0.14631018042564392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807145860934284, "compression_loss": 0.0, "distillation_loss": 0.08248989284038544, "epoch": 3.38, "learning_rate": 2.6389792726801778e-05, "loss": 0.0976, "step": 3558, "task_loss": 0.23338665068149567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807264711057725, "compression_loss": 0.0, "distillation_loss": 0.03553071618080139, "epoch": 3.38, "learning_rate": 2.6379153832583186e-05, "loss": 0.0324, "step": 3559, "task_loss": 0.004168994724750519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807383512341997, "compression_loss": 0.0, "distillation_loss": 0.018167581409215927, "epoch": 3.38, "learning_rate": 2.6368514687834672e-05, "loss": 0.0167, "step": 3560, "task_loss": 0.003941915929317474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807502264797134, "compression_loss": 0.0, "distillation_loss": 0.09068135917186737, "epoch": 3.38, "learning_rate": 2.6357875294488865e-05, "loss": 0.0855, "step": 3561, "task_loss": 0.03913940489292145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807620968433175, "compression_loss": 0.0, "distillation_loss": 0.06539643555879593, "epoch": 3.38, "learning_rate": 2.6347235654478482e-05, "loss": 0.0653, "step": 3562, "task_loss": 0.06428472697734833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807739623260155, "compression_loss": 0.0, "distillation_loss": 0.09735129773616791, "epoch": 3.38, "learning_rate": 2.6336595769736245e-05, "loss": 0.0938, "step": 3563, "task_loss": 0.06144891679286957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807858229288113, "compression_loss": 0.0, "distillation_loss": 0.1435181051492691, "epoch": 3.38, "learning_rate": 2.6325955642194948e-05, "loss": 0.1363, "step": 3564, "task_loss": 0.07164686918258667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7807976786527082, "compression_loss": 0.0, "distillation_loss": 0.05075103044509888, "epoch": 3.39, "learning_rate": 2.6315315273787428e-05, "loss": 0.0474, "step": 3565, "task_loss": 0.01732024922966957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808095294987103, "compression_loss": 0.0, "distillation_loss": 0.2770991325378418, "epoch": 3.39, "learning_rate": 2.630467466644655e-05, "loss": 0.2756, "step": 3566, "task_loss": 0.2625621557235718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808213754678212, "compression_loss": 0.0, "distillation_loss": 0.13243553042411804, "epoch": 3.39, "learning_rate": 2.629403382210524e-05, "loss": 0.1435, "step": 3567, "task_loss": 0.24289308488368988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808332165610443, "compression_loss": 0.0, "distillation_loss": 0.04136691614985466, "epoch": 3.39, "learning_rate": 2.628339274269645e-05, "loss": 0.0517, "step": 3568, "task_loss": 0.14519576728343964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808450527793835, "compression_loss": 0.0, "distillation_loss": 0.014836801216006279, "epoch": 3.39, "learning_rate": 2.6272751430153186e-05, "loss": 0.0174, "step": 3569, "task_loss": 0.04046123847365379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808568841238426, "compression_loss": 0.0, "distillation_loss": 0.11791656911373138, "epoch": 3.39, "learning_rate": 2.62621098864085e-05, "loss": 0.1122, "step": 3570, "task_loss": 0.06102790683507919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780868710595425, "compression_loss": 0.0, "distillation_loss": 0.026029329746961594, "epoch": 3.39, "learning_rate": 2.6251468113395465e-05, "loss": 0.0323, "step": 3571, "task_loss": 0.08885947614908218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7808805321951345, "compression_loss": 0.0, "distillation_loss": 0.07591719180345535, "epoch": 3.39, "learning_rate": 2.6240826113047235e-05, "loss": 0.0735, "step": 3572, "task_loss": 0.05175752192735672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.780892348923975, "compression_loss": 0.0, "distillation_loss": 0.030038170516490936, "epoch": 3.39, "learning_rate": 2.6230183887296955e-05, "loss": 0.0315, "step": 3573, "task_loss": 0.044650256633758545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809041607829498, "compression_loss": 0.0, "distillation_loss": 0.0887594223022461, "epoch": 3.39, "learning_rate": 2.6219541438077855e-05, "loss": 0.0841, "step": 3574, "task_loss": 0.04259561002254486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809159677730628, "compression_loss": 0.0, "distillation_loss": 0.10874880850315094, "epoch": 3.4, "learning_rate": 2.620889876732317e-05, "loss": 0.1032, "step": 3575, "task_loss": 0.05339481681585312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809277698953176, "compression_loss": 0.0, "distillation_loss": 0.03946895897388458, "epoch": 3.4, "learning_rate": 2.6198255876966204e-05, "loss": 0.0368, "step": 3576, "task_loss": 0.012670749798417091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809395671507181, "compression_loss": 0.0, "distillation_loss": 0.2581176459789276, "epoch": 3.4, "learning_rate": 2.6187612768940293e-05, "loss": 0.2457, "step": 3577, "task_loss": 0.13422563672065735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809513595402676, "compression_loss": 0.0, "distillation_loss": 0.0735367089509964, "epoch": 3.4, "learning_rate": 2.61769694451788e-05, "loss": 0.0734, "step": 3578, "task_loss": 0.07223978638648987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809631470649702, "compression_loss": 0.0, "distillation_loss": 0.07702729851007462, "epoch": 3.4, "learning_rate": 2.616632590761514e-05, "loss": 0.078, "step": 3579, "task_loss": 0.08705680817365646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809749297258293, "compression_loss": 0.0, "distillation_loss": 0.037445612251758575, "epoch": 3.4, "learning_rate": 2.615568215818276e-05, "loss": 0.0375, "step": 3580, "task_loss": 0.03814993426203728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809867075238486, "compression_loss": 0.0, "distillation_loss": 0.04116063937544823, "epoch": 3.4, "learning_rate": 2.6145038198815152e-05, "loss": 0.038, "step": 3581, "task_loss": 0.00981508381664753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7809984804600318, "compression_loss": 0.0, "distillation_loss": 0.09640266001224518, "epoch": 3.4, "learning_rate": 2.6134394031445843e-05, "loss": 0.1068, "step": 3582, "task_loss": 0.20053042471408844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810102485353828, "compression_loss": 0.0, "distillation_loss": 0.14251397550106049, "epoch": 3.4, "learning_rate": 2.6123749658008383e-05, "loss": 0.1474, "step": 3583, "task_loss": 0.1909736692905426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781022011750905, "compression_loss": 0.0, "distillation_loss": 0.027869436889886856, "epoch": 3.4, "learning_rate": 2.6113105080436396e-05, "loss": 0.0299, "step": 3584, "task_loss": 0.04815257340669632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810337701076022, "compression_loss": 0.0, "distillation_loss": 0.048661135137081146, "epoch": 3.4, "learning_rate": 2.6102460300663506e-05, "loss": 0.0529, "step": 3585, "task_loss": 0.09094928950071335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781045523606478, "compression_loss": 0.0, "distillation_loss": 0.04565204679965973, "epoch": 3.41, "learning_rate": 2.60918153206234e-05, "loss": 0.0424, "step": 3586, "task_loss": 0.013126576319336891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810572722485363, "compression_loss": 0.0, "distillation_loss": 0.10344560444355011, "epoch": 3.41, "learning_rate": 2.6081170142249773e-05, "loss": 0.1083, "step": 3587, "task_loss": 0.1516730636358261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810690160347805, "compression_loss": 0.0, "distillation_loss": 0.2309635877609253, "epoch": 3.41, "learning_rate": 2.607052476747639e-05, "loss": 0.222, "step": 3588, "task_loss": 0.14136911928653717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810807549662145, "compression_loss": 0.0, "distillation_loss": 0.024008475244045258, "epoch": 3.41, "learning_rate": 2.6059879198237026e-05, "loss": 0.0232, "step": 3589, "task_loss": 0.016412295401096344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7810924890438419, "compression_loss": 0.0, "distillation_loss": 0.012282849289476871, "epoch": 3.41, "learning_rate": 2.6049233436465498e-05, "loss": 0.0191, "step": 3590, "task_loss": 0.08071555197238922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811042182686664, "compression_loss": 0.0, "distillation_loss": 0.01852232962846756, "epoch": 3.41, "learning_rate": 2.6038587484095673e-05, "loss": 0.0184, "step": 3591, "task_loss": 0.01706135831773281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811159426416916, "compression_loss": 0.0, "distillation_loss": 0.15857195854187012, "epoch": 3.41, "learning_rate": 2.6027941343061412e-05, "loss": 0.1539, "step": 3592, "task_loss": 0.11174768954515457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811276621639213, "compression_loss": 0.0, "distillation_loss": 0.12345419079065323, "epoch": 3.41, "learning_rate": 2.6017295015296665e-05, "loss": 0.122, "step": 3593, "task_loss": 0.10872073471546173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781139376836359, "compression_loss": 0.0, "distillation_loss": 0.19352051615715027, "epoch": 3.41, "learning_rate": 2.600664850273538e-05, "loss": 0.1846, "step": 3594, "task_loss": 0.10442230850458145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811510866600085, "compression_loss": 0.0, "distillation_loss": 0.058610156178474426, "epoch": 3.41, "learning_rate": 2.599600180731155e-05, "loss": 0.0625, "step": 3595, "task_loss": 0.09727238863706589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811627916358737, "compression_loss": 0.0, "distillation_loss": 0.03841635584831238, "epoch": 3.42, "learning_rate": 2.598535493095919e-05, "loss": 0.0445, "step": 3596, "task_loss": 0.09931030124425888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811744917649579, "compression_loss": 0.0, "distillation_loss": 0.032603487372398376, "epoch": 3.42, "learning_rate": 2.5974707875612357e-05, "loss": 0.0327, "step": 3597, "task_loss": 0.0331152006983757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781186187048265, "compression_loss": 0.0, "distillation_loss": 0.07631140947341919, "epoch": 3.42, "learning_rate": 2.5964060643205153e-05, "loss": 0.0696, "step": 3598, "task_loss": 0.009496444836258888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7811978774867987, "compression_loss": 0.0, "distillation_loss": 0.027815554291009903, "epoch": 3.42, "learning_rate": 2.5953413235671688e-05, "loss": 0.036, "step": 3599, "task_loss": 0.10932107269763947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812095630815625, "compression_loss": 0.0, "distillation_loss": 0.07741496711969376, "epoch": 3.42, "learning_rate": 2.594276565494611e-05, "loss": 0.0841, "step": 3600, "task_loss": 0.14415237307548523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812212438335603, "compression_loss": 0.0, "distillation_loss": 0.09485390782356262, "epoch": 3.42, "learning_rate": 2.5932117902962616e-05, "loss": 0.0979, "step": 3601, "task_loss": 0.12560498714447021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812329197437957, "compression_loss": 0.0, "distillation_loss": 0.17144733667373657, "epoch": 3.42, "learning_rate": 2.5921469981655415e-05, "loss": 0.1648, "step": 3602, "task_loss": 0.10540945827960968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812445908132722, "compression_loss": 0.0, "distillation_loss": 0.11685089021921158, "epoch": 3.42, "learning_rate": 2.591082189295876e-05, "loss": 0.1339, "step": 3603, "task_loss": 0.28701621294021606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812562570429938, "compression_loss": 0.0, "distillation_loss": 0.0379994660615921, "epoch": 3.42, "learning_rate": 2.590017363880691e-05, "loss": 0.0415, "step": 3604, "task_loss": 0.07254001498222351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781267918433964, "compression_loss": 0.0, "distillation_loss": 0.07432805001735687, "epoch": 3.42, "learning_rate": 2.5889525221134192e-05, "loss": 0.072, "step": 3605, "task_loss": 0.0511680468916893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7812795749871866, "compression_loss": 0.0, "distillation_loss": 0.11450830101966858, "epoch": 3.42, "learning_rate": 2.5878876641874928e-05, "loss": 0.1202, "step": 3606, "task_loss": 0.1715685874223709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781291226703665, "compression_loss": 0.0, "distillation_loss": 0.0681910514831543, "epoch": 3.43, "learning_rate": 2.5868227902963493e-05, "loss": 0.064, "step": 3607, "task_loss": 0.026141859591007233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813028735844032, "compression_loss": 0.0, "distillation_loss": 0.056682735681533813, "epoch": 3.43, "learning_rate": 2.5857579006334282e-05, "loss": 0.0532, "step": 3608, "task_loss": 0.022059109061956406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813145156304048, "compression_loss": 0.0, "distillation_loss": 0.08323369175195694, "epoch": 3.43, "learning_rate": 2.58469299539217e-05, "loss": 0.0813, "step": 3609, "task_loss": 0.06356573849916458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813261528426734, "compression_loss": 0.0, "distillation_loss": 0.015457273460924625, "epoch": 3.43, "learning_rate": 2.5836280747660225e-05, "loss": 0.0144, "step": 3610, "task_loss": 0.005144596099853516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813377852222126, "compression_loss": 0.0, "distillation_loss": 0.05984696373343468, "epoch": 3.43, "learning_rate": 2.5825631389484323e-05, "loss": 0.0799, "step": 3611, "task_loss": 0.2608661949634552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813494127700263, "compression_loss": 0.0, "distillation_loss": 0.22596868872642517, "epoch": 3.43, "learning_rate": 2.58149818813285e-05, "loss": 0.2201, "step": 3612, "task_loss": 0.16762208938598633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813610354871181, "compression_loss": 0.0, "distillation_loss": 0.09914842247962952, "epoch": 3.43, "learning_rate": 2.5804332225127294e-05, "loss": 0.101, "step": 3613, "task_loss": 0.117338627576828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813726533744917, "compression_loss": 0.0, "distillation_loss": 0.06026465445756912, "epoch": 3.43, "learning_rate": 2.579368242281527e-05, "loss": 0.0678, "step": 3614, "task_loss": 0.13608182966709137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813842664331506, "compression_loss": 0.0, "distillation_loss": 0.16431432962417603, "epoch": 3.43, "learning_rate": 2.5783032476327007e-05, "loss": 0.1575, "step": 3615, "task_loss": 0.09610229730606079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7813958746640988, "compression_loss": 0.0, "distillation_loss": 0.22635792195796967, "epoch": 3.43, "learning_rate": 2.5772382387597128e-05, "loss": 0.2213, "step": 3616, "task_loss": 0.17567506432533264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814074780683398, "compression_loss": 0.0, "distillation_loss": 0.02461588941514492, "epoch": 3.43, "learning_rate": 2.5761732158560263e-05, "loss": 0.023, "step": 3617, "task_loss": 0.008299414068460464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814190766468772, "compression_loss": 0.0, "distillation_loss": 0.06911341845989227, "epoch": 3.44, "learning_rate": 2.5751081791151083e-05, "loss": 0.0656, "step": 3618, "task_loss": 0.03424249589443207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814306704007148, "compression_loss": 0.0, "distillation_loss": 0.21919073164463043, "epoch": 3.44, "learning_rate": 2.574043128730428e-05, "loss": 0.2192, "step": 3619, "task_loss": 0.2192375510931015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814422593308563, "compression_loss": 0.0, "distillation_loss": 0.052816398441791534, "epoch": 3.44, "learning_rate": 2.572978064895457e-05, "loss": 0.0481, "step": 3620, "task_loss": 0.006035482510924339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814538434383054, "compression_loss": 0.0, "distillation_loss": 0.19541379809379578, "epoch": 3.44, "learning_rate": 2.5719129878036686e-05, "loss": 0.1998, "step": 3621, "task_loss": 0.2397608757019043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814654227240656, "compression_loss": 0.0, "distillation_loss": 0.05258458852767944, "epoch": 3.44, "learning_rate": 2.5708478976485402e-05, "loss": 0.0484, "step": 3622, "task_loss": 0.010881522670388222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814769971891408, "compression_loss": 0.0, "distillation_loss": 0.07996995747089386, "epoch": 3.44, "learning_rate": 2.569782794623549e-05, "loss": 0.0778, "step": 3623, "task_loss": 0.058584682643413544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7814885668345345, "compression_loss": 0.0, "distillation_loss": 0.26377159357070923, "epoch": 3.44, "learning_rate": 2.5687176789221784e-05, "loss": 0.2652, "step": 3624, "task_loss": 0.2783206105232239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815001316612505, "compression_loss": 0.0, "distillation_loss": 0.1098802238702774, "epoch": 3.44, "learning_rate": 2.5676525507379097e-05, "loss": 0.1033, "step": 3625, "task_loss": 0.04377390444278717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815116916702924, "compression_loss": 0.0, "distillation_loss": 0.19648560881614685, "epoch": 3.44, "learning_rate": 2.566587410264229e-05, "loss": 0.1934, "step": 3626, "task_loss": 0.16522561013698578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815232468626641, "compression_loss": 0.0, "distillation_loss": 0.09389695525169373, "epoch": 3.44, "learning_rate": 2.565522257694625e-05, "loss": 0.0902, "step": 3627, "task_loss": 0.05693268030881882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781534797239369, "compression_loss": 0.0, "distillation_loss": 0.053470179438591, "epoch": 3.45, "learning_rate": 2.5644570932225874e-05, "loss": 0.0505, "step": 3628, "task_loss": 0.023593464866280556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.781546342801411, "compression_loss": 0.0, "distillation_loss": 0.13491879403591156, "epoch": 3.45, "learning_rate": 2.5633919170416087e-05, "loss": 0.1316, "step": 3629, "task_loss": 0.10193748027086258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815578835497936, "compression_loss": 0.0, "distillation_loss": 0.0393562838435173, "epoch": 3.45, "learning_rate": 2.5623267293451826e-05, "loss": 0.0364, "step": 3630, "task_loss": 0.009963281452655792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815694194855205, "compression_loss": 0.0, "distillation_loss": 0.09000158309936523, "epoch": 3.45, "learning_rate": 2.5612615303268062e-05, "loss": 0.0853, "step": 3631, "task_loss": 0.04277219995856285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815809506095955, "compression_loss": 0.0, "distillation_loss": 0.04296325892210007, "epoch": 3.45, "learning_rate": 2.560196320179977e-05, "loss": 0.0837, "step": 3632, "task_loss": 0.4504657983779907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7815924769230224, "compression_loss": 0.0, "distillation_loss": 0.05088837444782257, "epoch": 3.45, "learning_rate": 2.559131099098197e-05, "loss": 0.0472, "step": 3633, "task_loss": 0.013780592009425163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816039984268045, "compression_loss": 0.0, "distillation_loss": 0.04055938124656677, "epoch": 3.45, "learning_rate": 2.558065867274968e-05, "loss": 0.0381, "step": 3634, "task_loss": 0.015710243955254555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816155151219457, "compression_loss": 0.0, "distillation_loss": 0.21613720059394836, "epoch": 3.45, "learning_rate": 2.5570006249037943e-05, "loss": 0.2207, "step": 3635, "task_loss": 0.261584997177124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816270270094497, "compression_loss": 0.0, "distillation_loss": 0.17852503061294556, "epoch": 3.45, "learning_rate": 2.5559353721781832e-05, "loss": 0.1745, "step": 3636, "task_loss": 0.13826580345630646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816385340903202, "compression_loss": 0.0, "distillation_loss": 0.019594872370362282, "epoch": 3.45, "learning_rate": 2.5548701092916415e-05, "loss": 0.0184, "step": 3637, "task_loss": 0.00783473439514637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816500363655609, "compression_loss": 0.0, "distillation_loss": 0.028669090941548347, "epoch": 3.45, "learning_rate": 2.5538048364376806e-05, "loss": 0.0316, "step": 3638, "task_loss": 0.05839303508400917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816615338361753, "compression_loss": 0.0, "distillation_loss": 0.039027225226163864, "epoch": 3.46, "learning_rate": 2.552739553809812e-05, "loss": 0.0409, "step": 3639, "task_loss": 0.05817575752735138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816730265031673, "compression_loss": 0.0, "distillation_loss": 0.05956938490271568, "epoch": 3.46, "learning_rate": 2.5516742616015493e-05, "loss": 0.0601, "step": 3640, "task_loss": 0.06536316871643066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816845143675404, "compression_loss": 0.0, "distillation_loss": 0.19583433866500854, "epoch": 3.46, "learning_rate": 2.5506089600064086e-05, "loss": 0.2043, "step": 3641, "task_loss": 0.2809743881225586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7816959974302984, "compression_loss": 0.0, "distillation_loss": 0.10645139217376709, "epoch": 3.46, "learning_rate": 2.549543649217906e-05, "loss": 0.1073, "step": 3642, "task_loss": 0.11531829088926315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817074756924449, "compression_loss": 0.0, "distillation_loss": 0.13260212540626526, "epoch": 3.46, "learning_rate": 2.548478329429561e-05, "loss": 0.1288, "step": 3643, "task_loss": 0.09502564370632172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817189491549836, "compression_loss": 0.0, "distillation_loss": 0.03851540386676788, "epoch": 3.46, "learning_rate": 2.5474130008348946e-05, "loss": 0.0381, "step": 3644, "task_loss": 0.034055422991514206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817304178189184, "compression_loss": 0.0, "distillation_loss": 0.03768332302570343, "epoch": 3.46, "learning_rate": 2.5463476636274276e-05, "loss": 0.0514, "step": 3645, "task_loss": 0.17461322247982025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817418816852526, "compression_loss": 0.0, "distillation_loss": 0.19072747230529785, "epoch": 3.46, "learning_rate": 2.5452823180006845e-05, "loss": 0.1848, "step": 3646, "task_loss": 0.13176460564136505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817533407549903, "compression_loss": 0.0, "distillation_loss": 0.06055070832371712, "epoch": 3.46, "learning_rate": 2.5442169641481907e-05, "loss": 0.0671, "step": 3647, "task_loss": 0.1260763555765152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817647950291348, "compression_loss": 0.0, "distillation_loss": 0.03400256484746933, "epoch": 3.46, "learning_rate": 2.5431516022634715e-05, "loss": 0.0456, "step": 3648, "task_loss": 0.1496407389640808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817762445086899, "compression_loss": 0.0, "distillation_loss": 0.021507391706109047, "epoch": 3.47, "learning_rate": 2.5420862325400563e-05, "loss": 0.02, "step": 3649, "task_loss": 0.006038764491677284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817876891946594, "compression_loss": 0.0, "distillation_loss": 0.02982695959508419, "epoch": 3.47, "learning_rate": 2.5410208551714742e-05, "loss": 0.0378, "step": 3650, "task_loss": 0.10967092961072922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7817991290880468, "compression_loss": 0.0, "distillation_loss": 0.08754716068506241, "epoch": 3.47, "learning_rate": 2.539955470351257e-05, "loss": 0.0896, "step": 3651, "task_loss": 0.1080557107925415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818105641898561, "compression_loss": 0.0, "distillation_loss": 0.03534093499183655, "epoch": 3.47, "learning_rate": 2.5388900782729347e-05, "loss": 0.0326, "step": 3652, "task_loss": 0.007803870365023613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818219945010906, "compression_loss": 0.0, "distillation_loss": 0.07137121260166168, "epoch": 3.47, "learning_rate": 2.5378246791300435e-05, "loss": 0.077, "step": 3653, "task_loss": 0.12779828906059265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818334200227542, "compression_loss": 0.0, "distillation_loss": 0.2230396568775177, "epoch": 3.47, "learning_rate": 2.536759273116117e-05, "loss": 0.2194, "step": 3654, "task_loss": 0.18703873455524445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818448407558506, "compression_loss": 0.0, "distillation_loss": 0.13464270532131195, "epoch": 3.47, "learning_rate": 2.5356938604246916e-05, "loss": 0.1454, "step": 3655, "task_loss": 0.24241343140602112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818562567013833, "compression_loss": 0.0, "distillation_loss": 0.03593993932008743, "epoch": 3.47, "learning_rate": 2.534628441249305e-05, "loss": 0.0394, "step": 3656, "task_loss": 0.07036207616329193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818676678603561, "compression_loss": 0.0, "distillation_loss": 0.13949531316757202, "epoch": 3.47, "learning_rate": 2.5335630157834937e-05, "loss": 0.1425, "step": 3657, "task_loss": 0.16993454098701477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818790742337728, "compression_loss": 0.0, "distillation_loss": 0.08820469677448273, "epoch": 3.47, "learning_rate": 2.5324975842208004e-05, "loss": 0.0803, "step": 3658, "task_loss": 0.008724292740225792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7818904758226368, "compression_loss": 0.0, "distillation_loss": 0.12831662595272064, "epoch": 3.47, "learning_rate": 2.5314321467547635e-05, "loss": 0.1308, "step": 3659, "task_loss": 0.15305723249912262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819018726279521, "compression_loss": 0.0, "distillation_loss": 0.02787201665341854, "epoch": 3.48, "learning_rate": 2.5303667035789262e-05, "loss": 0.0258, "step": 3660, "task_loss": 0.007001947611570358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819132646507222, "compression_loss": 0.0, "distillation_loss": 0.02681071124970913, "epoch": 3.48, "learning_rate": 2.5293012548868306e-05, "loss": 0.0394, "step": 3661, "task_loss": 0.15266357362270355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819246518919507, "compression_loss": 0.0, "distillation_loss": 0.07564039528369904, "epoch": 3.48, "learning_rate": 2.5282358008720213e-05, "loss": 0.0799, "step": 3662, "task_loss": 0.11861392855644226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819360343526415, "compression_loss": 0.0, "distillation_loss": 0.04754069447517395, "epoch": 3.48, "learning_rate": 2.5271703417280433e-05, "loss": 0.0612, "step": 3663, "task_loss": 0.1845139116048813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819474120337981, "compression_loss": 0.0, "distillation_loss": 0.01500766258686781, "epoch": 3.48, "learning_rate": 2.526104877648441e-05, "loss": 0.0139, "step": 3664, "task_loss": 0.00399116612970829 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819587849364243, "compression_loss": 0.0, "distillation_loss": 0.06251572072505951, "epoch": 3.48, "learning_rate": 2.525039408826762e-05, "loss": 0.065, "step": 3665, "task_loss": 0.08762296289205551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819701530615237, "compression_loss": 0.0, "distillation_loss": 0.1165790855884552, "epoch": 3.48, "learning_rate": 2.523973935456554e-05, "loss": 0.1153, "step": 3666, "task_loss": 0.10344909876585007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819815164101, "compression_loss": 0.0, "distillation_loss": 0.09375932067632675, "epoch": 3.48, "learning_rate": 2.522908457731366e-05, "loss": 0.0982, "step": 3667, "task_loss": 0.138652503490448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7819928749831571, "compression_loss": 0.0, "distillation_loss": 0.09869857132434845, "epoch": 3.48, "learning_rate": 2.5218429758447455e-05, "loss": 0.0972, "step": 3668, "task_loss": 0.08332294970750809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820042287816983, "compression_loss": 0.0, "distillation_loss": 0.07920647412538528, "epoch": 3.48, "learning_rate": 2.520777489990243e-05, "loss": 0.0822, "step": 3669, "task_loss": 0.10958139598369598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820155778067275, "compression_loss": 0.0, "distillation_loss": 0.04178139567375183, "epoch": 3.49, "learning_rate": 2.5197120003614094e-05, "loss": 0.0431, "step": 3670, "task_loss": 0.054654479026794434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820269220592484, "compression_loss": 0.0, "distillation_loss": 0.08496996760368347, "epoch": 3.49, "learning_rate": 2.518646507151796e-05, "loss": 0.0805, "step": 3671, "task_loss": 0.04056711122393608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820382615402646, "compression_loss": 0.0, "distillation_loss": 0.07371478527784348, "epoch": 3.49, "learning_rate": 2.517581010554956e-05, "loss": 0.0778, "step": 3672, "task_loss": 0.11423461884260178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820495962507797, "compression_loss": 0.0, "distillation_loss": 0.04122939705848694, "epoch": 3.49, "learning_rate": 2.5165155107644394e-05, "loss": 0.0386, "step": 3673, "task_loss": 0.014970516785979271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820609261917976, "compression_loss": 0.0, "distillation_loss": 0.18131166696548462, "epoch": 3.49, "learning_rate": 2.515450007973801e-05, "loss": 0.1739, "step": 3674, "task_loss": 0.10724371671676636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820722513643219, "compression_loss": 0.0, "distillation_loss": 0.11633885651826859, "epoch": 3.49, "learning_rate": 2.5143845023765943e-05, "loss": 0.1146, "step": 3675, "task_loss": 0.09903288632631302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820835717693562, "compression_loss": 0.0, "distillation_loss": 0.15314285457134247, "epoch": 3.49, "learning_rate": 2.513318994166373e-05, "loss": 0.1471, "step": 3676, "task_loss": 0.09299644827842712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7820948874079042, "compression_loss": 0.0, "distillation_loss": 0.05131184682250023, "epoch": 3.49, "learning_rate": 2.5122534835366934e-05, "loss": 0.0473, "step": 3677, "task_loss": 0.011473558843135834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821061982809697, "compression_loss": 0.0, "distillation_loss": 0.23348447680473328, "epoch": 3.49, "learning_rate": 2.5111879706811087e-05, "loss": 0.226, "step": 3678, "task_loss": 0.15873844921588898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821175043895563, "compression_loss": 0.0, "distillation_loss": 0.050928764045238495, "epoch": 3.49, "learning_rate": 2.5101224557931758e-05, "loss": 0.0542, "step": 3679, "task_loss": 0.08366743475198746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821288057346677, "compression_loss": 0.0, "distillation_loss": 0.1100388914346695, "epoch": 3.49, "learning_rate": 2.5090569390664492e-05, "loss": 0.1145, "step": 3680, "task_loss": 0.15434427559375763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821401023173075, "compression_loss": 0.0, "distillation_loss": 0.13357719779014587, "epoch": 3.5, "learning_rate": 2.5079914206944866e-05, "loss": 0.1348, "step": 3681, "task_loss": 0.14616169035434723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821513941384794, "compression_loss": 0.0, "distillation_loss": 0.10456112027168274, "epoch": 3.5, "learning_rate": 2.5069259008708446e-05, "loss": 0.1072, "step": 3682, "task_loss": 0.1304989755153656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821626811991873, "compression_loss": 0.0, "distillation_loss": 0.028667902573943138, "epoch": 3.5, "learning_rate": 2.5058603797890778e-05, "loss": 0.0353, "step": 3683, "task_loss": 0.094968281686306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821739635004347, "compression_loss": 0.0, "distillation_loss": 0.11443310976028442, "epoch": 3.5, "learning_rate": 2.504794857642746e-05, "loss": 0.1115, "step": 3684, "task_loss": 0.08492926508188248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821852410432252, "compression_loss": 0.0, "distillation_loss": 0.04449796676635742, "epoch": 3.5, "learning_rate": 2.5037293346254044e-05, "loss": 0.0453, "step": 3685, "task_loss": 0.05215233191847801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7821965138285626, "compression_loss": 0.0, "distillation_loss": 0.08054036647081375, "epoch": 3.5, "learning_rate": 2.5026638109306118e-05, "loss": 0.0923, "step": 3686, "task_loss": 0.1976814568042755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822077818574505, "compression_loss": 0.0, "distillation_loss": 0.03564410284161568, "epoch": 3.5, "learning_rate": 2.5015982867519245e-05, "loss": 0.0327, "step": 3687, "task_loss": 0.006579475477337837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822190451308928, "compression_loss": 0.0, "distillation_loss": 0.032507769763469696, "epoch": 3.5, "learning_rate": 2.500532762282901e-05, "loss": 0.0497, "step": 3688, "task_loss": 0.20428074896335602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822303036498929, "compression_loss": 0.0, "distillation_loss": 0.03502486273646355, "epoch": 3.5, "learning_rate": 2.4994672377170988e-05, "loss": 0.0328, "step": 3689, "task_loss": 0.013175180181860924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822415574154546, "compression_loss": 0.0, "distillation_loss": 0.05492643266916275, "epoch": 3.5, "learning_rate": 2.4984017132480754e-05, "loss": 0.0587, "step": 3690, "task_loss": 0.0923444926738739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822528064285817, "compression_loss": 0.0, "distillation_loss": 0.022486487403512, "epoch": 3.51, "learning_rate": 2.4973361890693888e-05, "loss": 0.021, "step": 3691, "task_loss": 0.0075523629784584045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822640506902777, "compression_loss": 0.0, "distillation_loss": 0.11572916805744171, "epoch": 3.51, "learning_rate": 2.496270665374596e-05, "loss": 0.1192, "step": 3692, "task_loss": 0.15077991783618927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822752902015464, "compression_loss": 0.0, "distillation_loss": 0.08084140717983246, "epoch": 3.51, "learning_rate": 2.4952051423572548e-05, "loss": 0.0787, "step": 3693, "task_loss": 0.058929864317178726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822865249633913, "compression_loss": 0.0, "distillation_loss": 0.06542964279651642, "epoch": 3.51, "learning_rate": 2.494139620210923e-05, "loss": 0.0636, "step": 3694, "task_loss": 0.04673419147729874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7822977549768164, "compression_loss": 0.0, "distillation_loss": 0.0398731529712677, "epoch": 3.51, "learning_rate": 2.4930740991291567e-05, "loss": 0.0375, "step": 3695, "task_loss": 0.016480809077620506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823089802428251, "compression_loss": 0.0, "distillation_loss": 0.02609540894627571, "epoch": 3.51, "learning_rate": 2.492008579305514e-05, "loss": 0.032, "step": 3696, "task_loss": 0.08469408750534058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823202007624213, "compression_loss": 0.0, "distillation_loss": 0.07933405041694641, "epoch": 3.51, "learning_rate": 2.4909430609335517e-05, "loss": 0.0922, "step": 3697, "task_loss": 0.2080336958169937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823314165366084, "compression_loss": 0.0, "distillation_loss": 0.08961853384971619, "epoch": 3.51, "learning_rate": 2.4898775442068248e-05, "loss": 0.0827, "step": 3698, "task_loss": 0.020808879286050797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823426275663903, "compression_loss": 0.0, "distillation_loss": 0.028536148369312286, "epoch": 3.51, "learning_rate": 2.4888120293188916e-05, "loss": 0.0301, "step": 3699, "task_loss": 0.04397343844175339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823538338527707, "compression_loss": 0.0, "distillation_loss": 0.030012015253305435, "epoch": 3.51, "learning_rate": 2.487746516463307e-05, "loss": 0.0367, "step": 3700, "task_loss": 0.096799835562706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823650353967531, "compression_loss": 0.0, "distillation_loss": 0.03244200348854065, "epoch": 3.51, "learning_rate": 2.486681005833627e-05, "loss": 0.0298, "step": 3701, "task_loss": 0.005791664123535156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823762321993413, "compression_loss": 0.0, "distillation_loss": 0.19793766736984253, "epoch": 3.52, "learning_rate": 2.4856154976234063e-05, "loss": 0.1875, "step": 3702, "task_loss": 0.09377109259366989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7823874242615391, "compression_loss": 0.0, "distillation_loss": 0.15221963822841644, "epoch": 3.52, "learning_rate": 2.4845499920261993e-05, "loss": 0.1428, "step": 3703, "task_loss": 0.05772307515144348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78239861158435, "compression_loss": 0.0, "distillation_loss": 0.031839657574892044, "epoch": 3.52, "learning_rate": 2.4834844892355615e-05, "loss": 0.0292, "step": 3704, "task_loss": 0.005376823246479034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824097941687777, "compression_loss": 0.0, "distillation_loss": 0.03910262882709503, "epoch": 3.52, "learning_rate": 2.4824189894450453e-05, "loss": 0.0486, "step": 3705, "task_loss": 0.1344204545021057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824209720158258, "compression_loss": 0.0, "distillation_loss": 0.14387640357017517, "epoch": 3.52, "learning_rate": 2.4813534928482038e-05, "loss": 0.1372, "step": 3706, "task_loss": 0.07675487548112869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824321451264983, "compression_loss": 0.0, "distillation_loss": 0.05041220039129257, "epoch": 3.52, "learning_rate": 2.4802879996385905e-05, "loss": 0.0566, "step": 3707, "task_loss": 0.11191262304782867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824433135017986, "compression_loss": 0.0, "distillation_loss": 0.1696065366268158, "epoch": 3.52, "learning_rate": 2.4792225100097578e-05, "loss": 0.161, "step": 3708, "task_loss": 0.08338792622089386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824544771427305, "compression_loss": 0.0, "distillation_loss": 0.035738278180360794, "epoch": 3.52, "learning_rate": 2.4781570241552554e-05, "loss": 0.0337, "step": 3709, "task_loss": 0.015493502840399742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824656360502976, "compression_loss": 0.0, "distillation_loss": 0.18329283595085144, "epoch": 3.52, "learning_rate": 2.477091542268635e-05, "loss": 0.184, "step": 3710, "task_loss": 0.18989968299865723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824767902255038, "compression_loss": 0.0, "distillation_loss": 0.016138188540935516, "epoch": 3.52, "learning_rate": 2.4760260645434462e-05, "loss": 0.0148, "step": 3711, "task_loss": 0.0031305551528930664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824879396693524, "compression_loss": 0.0, "distillation_loss": 0.18617260456085205, "epoch": 3.53, "learning_rate": 2.474960591173238e-05, "loss": 0.1785, "step": 3712, "task_loss": 0.10942824184894562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7824990843828474, "compression_loss": 0.0, "distillation_loss": 0.0715465247631073, "epoch": 3.53, "learning_rate": 2.47389512235156e-05, "loss": 0.0795, "step": 3713, "task_loss": 0.15127640962600708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825102243669924, "compression_loss": 0.0, "distillation_loss": 0.04901812970638275, "epoch": 3.53, "learning_rate": 2.472829658271958e-05, "loss": 0.0451, "step": 3714, "task_loss": 0.01026928424835205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825213596227909, "compression_loss": 0.0, "distillation_loss": 0.03361319750547409, "epoch": 3.53, "learning_rate": 2.4717641991279786e-05, "loss": 0.031, "step": 3715, "task_loss": 0.007931182160973549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825324901512469, "compression_loss": 0.0, "distillation_loss": 0.01693807728588581, "epoch": 3.53, "learning_rate": 2.4706987451131693e-05, "loss": 0.0202, "step": 3716, "task_loss": 0.04959682375192642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825436159533639, "compression_loss": 0.0, "distillation_loss": 0.04695942997932434, "epoch": 3.53, "learning_rate": 2.4696332964210743e-05, "loss": 0.0442, "step": 3717, "task_loss": 0.01931283064186573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825547370301456, "compression_loss": 0.0, "distillation_loss": 0.04885145276784897, "epoch": 3.53, "learning_rate": 2.468567853245237e-05, "loss": 0.0489, "step": 3718, "task_loss": 0.049653246998786926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825658533825957, "compression_loss": 0.0, "distillation_loss": 0.03596015274524689, "epoch": 3.53, "learning_rate": 2.4675024157792005e-05, "loss": 0.0335, "step": 3719, "task_loss": 0.011791346594691277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825769650117178, "compression_loss": 0.0, "distillation_loss": 0.08147747069597244, "epoch": 3.53, "learning_rate": 2.4664369842165068e-05, "loss": 0.0833, "step": 3720, "task_loss": 0.0997631698846817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7825880719185156, "compression_loss": 0.0, "distillation_loss": 0.0436641164124012, "epoch": 3.53, "learning_rate": 2.4653715587506963e-05, "loss": 0.0399, "step": 3721, "task_loss": 0.006380394101142883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.782599174103993, "compression_loss": 0.0, "distillation_loss": 0.0826558843255043, "epoch": 3.53, "learning_rate": 2.4643061395753093e-05, "loss": 0.0923, "step": 3722, "task_loss": 0.178600013256073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826102715691534, "compression_loss": 0.0, "distillation_loss": 0.07361429929733276, "epoch": 3.54, "learning_rate": 2.463240726883884e-05, "loss": 0.0684, "step": 3723, "task_loss": 0.021939026191830635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826213643150007, "compression_loss": 0.0, "distillation_loss": 0.08207493275403976, "epoch": 3.54, "learning_rate": 2.4621753208699567e-05, "loss": 0.0901, "step": 3724, "task_loss": 0.16210611164569855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826324523425384, "compression_loss": 0.0, "distillation_loss": 0.058608826249837875, "epoch": 3.54, "learning_rate": 2.4611099217270652e-05, "loss": 0.0535, "step": 3725, "task_loss": 0.007720122113823891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826435356527703, "compression_loss": 0.0, "distillation_loss": 0.03315757215023041, "epoch": 3.54, "learning_rate": 2.4600445296487436e-05, "loss": 0.0306, "step": 3726, "task_loss": 0.007330842316150665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826546142467001, "compression_loss": 0.0, "distillation_loss": 0.12233985215425491, "epoch": 3.54, "learning_rate": 2.4589791448285264e-05, "loss": 0.1266, "step": 3727, "task_loss": 0.16475452482700348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826656881253313, "compression_loss": 0.0, "distillation_loss": 0.05593999847769737, "epoch": 3.54, "learning_rate": 2.4579137674599443e-05, "loss": 0.0616, "step": 3728, "task_loss": 0.11271567642688751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826767572896678, "compression_loss": 0.0, "distillation_loss": 0.12087702751159668, "epoch": 3.54, "learning_rate": 2.456848397736529e-05, "loss": 0.1295, "step": 3729, "task_loss": 0.20751793682575226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826878217407132, "compression_loss": 0.0, "distillation_loss": 0.060698773711919785, "epoch": 3.54, "learning_rate": 2.455783035851811e-05, "loss": 0.0597, "step": 3730, "task_loss": 0.05119254067540169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7826988814794711, "compression_loss": 0.0, "distillation_loss": 0.07374706864356995, "epoch": 3.54, "learning_rate": 2.4547176819993164e-05, "loss": 0.082, "step": 3731, "task_loss": 0.15632230043411255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827099365069453, "compression_loss": 0.0, "distillation_loss": 0.07239855825901031, "epoch": 3.54, "learning_rate": 2.4536523363725727e-05, "loss": 0.071, "step": 3732, "task_loss": 0.058301545679569244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827209868241394, "compression_loss": 0.0, "distillation_loss": 0.02140430361032486, "epoch": 3.55, "learning_rate": 2.4525869991651057e-05, "loss": 0.02, "step": 3733, "task_loss": 0.006873439997434616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827320324320571, "compression_loss": 0.0, "distillation_loss": 0.020102519541978836, "epoch": 3.55, "learning_rate": 2.4515216705704395e-05, "loss": 0.0195, "step": 3734, "task_loss": 0.014333127066493034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827430733317022, "compression_loss": 0.0, "distillation_loss": 0.055659808218479156, "epoch": 3.55, "learning_rate": 2.4504563507820942e-05, "loss": 0.0636, "step": 3735, "task_loss": 0.1351262629032135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827541095240782, "compression_loss": 0.0, "distillation_loss": 0.04852107912302017, "epoch": 3.55, "learning_rate": 2.449391039993592e-05, "loss": 0.0525, "step": 3736, "task_loss": 0.08783454447984695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827651410101889, "compression_loss": 0.0, "distillation_loss": 0.029902100563049316, "epoch": 3.55, "learning_rate": 2.4483257383984513e-05, "loss": 0.0296, "step": 3737, "task_loss": 0.026881104335188866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.782776167791038, "compression_loss": 0.0, "distillation_loss": 0.11988551914691925, "epoch": 3.55, "learning_rate": 2.4472604461901887e-05, "loss": 0.1168, "step": 3738, "task_loss": 0.08916487544775009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827871898676291, "compression_loss": 0.0, "distillation_loss": 0.09784644842147827, "epoch": 3.55, "learning_rate": 2.44619516356232e-05, "loss": 0.0946, "step": 3739, "task_loss": 0.06569681316614151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7827982072409658, "compression_loss": 0.0, "distillation_loss": 0.06321494281291962, "epoch": 3.55, "learning_rate": 2.4451298907083597e-05, "loss": 0.0693, "step": 3740, "task_loss": 0.1242145225405693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828092199120521, "compression_loss": 0.0, "distillation_loss": 0.028960630297660828, "epoch": 3.55, "learning_rate": 2.4440646278218177e-05, "loss": 0.0275, "step": 3741, "task_loss": 0.013882100582122803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828202278818913, "compression_loss": 0.0, "distillation_loss": 0.04928376153111458, "epoch": 3.55, "learning_rate": 2.442999375096206e-05, "loss": 0.0484, "step": 3742, "task_loss": 0.040039315819740295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828312311514873, "compression_loss": 0.0, "distillation_loss": 0.08106836676597595, "epoch": 3.55, "learning_rate": 2.4419341327250323e-05, "loss": 0.0827, "step": 3743, "task_loss": 0.09725627303123474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828422297218438, "compression_loss": 0.0, "distillation_loss": 0.26687347888946533, "epoch": 3.56, "learning_rate": 2.4408689009018037e-05, "loss": 0.2664, "step": 3744, "task_loss": 0.26216715574264526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828532235939644, "compression_loss": 0.0, "distillation_loss": 0.061114098876714706, "epoch": 3.56, "learning_rate": 2.4398036798200235e-05, "loss": 0.0888, "step": 3745, "task_loss": 0.3378845453262329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828642127688528, "compression_loss": 0.0, "distillation_loss": 0.10098116099834442, "epoch": 3.56, "learning_rate": 2.4387384696731947e-05, "loss": 0.1014, "step": 3746, "task_loss": 0.10469695925712585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828751972475126, "compression_loss": 0.0, "distillation_loss": 0.17749673128128052, "epoch": 3.56, "learning_rate": 2.4376732706548183e-05, "loss": 0.187, "step": 3747, "task_loss": 0.2721644937992096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828861770309476, "compression_loss": 0.0, "distillation_loss": 0.0985332578420639, "epoch": 3.56, "learning_rate": 2.4366080829583922e-05, "loss": 0.1061, "step": 3748, "task_loss": 0.17437314987182617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7828971521201615, "compression_loss": 0.0, "distillation_loss": 0.06623338907957077, "epoch": 3.56, "learning_rate": 2.4355429067774135e-05, "loss": 0.0673, "step": 3749, "task_loss": 0.07646562159061432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829081225161579, "compression_loss": 0.0, "distillation_loss": 0.05294762924313545, "epoch": 3.56, "learning_rate": 2.4344777423053748e-05, "loss": 0.0511, "step": 3750, "task_loss": 0.03410526365041733 }, { "epoch": 3.56, "eval_accuracy": 0.8853211009174312, "eval_loss": 0.439721018075943, "eval_runtime": 18.0919, "eval_samples_per_second": 48.198, "eval_steps_per_second": 6.025, "step": 3750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829190882199406, "compression_loss": 0.0, "distillation_loss": 0.035928741097450256, "epoch": 3.56, "learning_rate": 2.433412589735771e-05, "loss": 0.033, "step": 3751, "task_loss": 0.0063820406794548035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829300492325132, "compression_loss": 0.0, "distillation_loss": 0.04816991835832596, "epoch": 3.56, "learning_rate": 2.4323474492620905e-05, "loss": 0.0522, "step": 3752, "task_loss": 0.08798433095216751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829410055548792, "compression_loss": 0.0, "distillation_loss": 0.05488145351409912, "epoch": 3.56, "learning_rate": 2.431282321077822e-05, "loss": 0.0528, "step": 3753, "task_loss": 0.033642224967479706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829519571880427, "compression_loss": 0.0, "distillation_loss": 0.04462762922048569, "epoch": 3.57, "learning_rate": 2.4302172053764514e-05, "loss": 0.0415, "step": 3754, "task_loss": 0.012937184423208237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.782962904133007, "compression_loss": 0.0, "distillation_loss": 0.13158389925956726, "epoch": 3.57, "learning_rate": 2.4291521023514604e-05, "loss": 0.1315, "step": 3755, "task_loss": 0.13095977902412415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829738463907759, "compression_loss": 0.0, "distillation_loss": 0.12265504896640778, "epoch": 3.57, "learning_rate": 2.4280870121963323e-05, "loss": 0.118, "step": 3756, "task_loss": 0.0760970339179039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829847839623532, "compression_loss": 0.0, "distillation_loss": 0.03240625560283661, "epoch": 3.57, "learning_rate": 2.4270219351045438e-05, "loss": 0.0415, "step": 3757, "task_loss": 0.12327875196933746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7829957168487424, "compression_loss": 0.0, "distillation_loss": 0.033418092876672745, "epoch": 3.57, "learning_rate": 2.425956871269572e-05, "loss": 0.0388, "step": 3758, "task_loss": 0.08721967041492462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830066450509473, "compression_loss": 0.0, "distillation_loss": 0.038820862770080566, "epoch": 3.57, "learning_rate": 2.4248918208848916e-05, "loss": 0.0417, "step": 3759, "task_loss": 0.06801855564117432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830175685699716, "compression_loss": 0.0, "distillation_loss": 0.06290020793676376, "epoch": 3.57, "learning_rate": 2.423826784143974e-05, "loss": 0.0586, "step": 3760, "task_loss": 0.019695976749062538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830284874068189, "compression_loss": 0.0, "distillation_loss": 0.04252585023641586, "epoch": 3.57, "learning_rate": 2.422761761240288e-05, "loss": 0.0478, "step": 3761, "task_loss": 0.09514269232749939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783039401562493, "compression_loss": 0.0, "distillation_loss": 0.22789345681667328, "epoch": 3.57, "learning_rate": 2.4216967523673e-05, "loss": 0.22, "step": 3762, "task_loss": 0.14860832691192627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830503110379974, "compression_loss": 0.0, "distillation_loss": 0.13879983127117157, "epoch": 3.57, "learning_rate": 2.420631757718474e-05, "loss": 0.1326, "step": 3763, "task_loss": 0.07688990235328674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830612158343359, "compression_loss": 0.0, "distillation_loss": 0.14175349473953247, "epoch": 3.57, "learning_rate": 2.419566777487271e-05, "loss": 0.138, "step": 3764, "task_loss": 0.1046019047498703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830721159525121, "compression_loss": 0.0, "distillation_loss": 0.02354617603123188, "epoch": 3.58, "learning_rate": 2.4185018118671504e-05, "loss": 0.0316, "step": 3765, "task_loss": 0.1045549288392067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830830113935299, "compression_loss": 0.0, "distillation_loss": 0.23610442876815796, "epoch": 3.58, "learning_rate": 2.417436861051569e-05, "loss": 0.2247, "step": 3766, "task_loss": 0.12234364449977875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7830939021583927, "compression_loss": 0.0, "distillation_loss": 0.08728310465812683, "epoch": 3.58, "learning_rate": 2.4163719252339774e-05, "loss": 0.0861, "step": 3767, "task_loss": 0.0753721296787262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831047882481044, "compression_loss": 0.0, "distillation_loss": 0.04650529474020004, "epoch": 3.58, "learning_rate": 2.41530700460783e-05, "loss": 0.0705, "step": 3768, "task_loss": 0.2862272262573242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831156696636685, "compression_loss": 0.0, "distillation_loss": 0.03932388499379158, "epoch": 3.58, "learning_rate": 2.4142420993665727e-05, "loss": 0.0426, "step": 3769, "task_loss": 0.072311170399189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831265464060888, "compression_loss": 0.0, "distillation_loss": 0.09386499226093292, "epoch": 3.58, "learning_rate": 2.4131772097036516e-05, "loss": 0.0865, "step": 3770, "task_loss": 0.0198683962225914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783137418476369, "compression_loss": 0.0, "distillation_loss": 0.09301801770925522, "epoch": 3.58, "learning_rate": 2.4121123358125078e-05, "loss": 0.1046, "step": 3771, "task_loss": 0.20893818140029907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831482858755127, "compression_loss": 0.0, "distillation_loss": 0.15907953679561615, "epoch": 3.58, "learning_rate": 2.4110474778865817e-05, "loss": 0.1651, "step": 3772, "task_loss": 0.21910780668258667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831591486045236, "compression_loss": 0.0, "distillation_loss": 0.050729766488075256, "epoch": 3.58, "learning_rate": 2.4099826361193098e-05, "loss": 0.0566, "step": 3773, "task_loss": 0.10932128876447678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831700066644055, "compression_loss": 0.0, "distillation_loss": 0.0897529125213623, "epoch": 3.58, "learning_rate": 2.4089178107041254e-05, "loss": 0.0848, "step": 3774, "task_loss": 0.039870765060186386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831808600561619, "compression_loss": 0.0, "distillation_loss": 0.1213412880897522, "epoch": 3.58, "learning_rate": 2.407853001834459e-05, "loss": 0.1197, "step": 3775, "task_loss": 0.10536501556634903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7831917087807966, "compression_loss": 0.0, "distillation_loss": 0.038229409605264664, "epoch": 3.59, "learning_rate": 2.4067882097037383e-05, "loss": 0.0357, "step": 3776, "task_loss": 0.012635331600904465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832025528393133, "compression_loss": 0.0, "distillation_loss": 0.2114480882883072, "epoch": 3.59, "learning_rate": 2.4057234345053894e-05, "loss": 0.204, "step": 3777, "task_loss": 0.13707035779953003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832133922327156, "compression_loss": 0.0, "distillation_loss": 0.03434763103723526, "epoch": 3.59, "learning_rate": 2.404658676432832e-05, "loss": 0.0492, "step": 3778, "task_loss": 0.18263056874275208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832242269620071, "compression_loss": 0.0, "distillation_loss": 0.024286724627017975, "epoch": 3.59, "learning_rate": 2.403593935679485e-05, "loss": 0.0226, "step": 3779, "task_loss": 0.0070180464535951614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832350570281917, "compression_loss": 0.0, "distillation_loss": 0.05927498638629913, "epoch": 3.59, "learning_rate": 2.402529212438765e-05, "loss": 0.0679, "step": 3780, "task_loss": 0.1452222764492035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783245882432273, "compression_loss": 0.0, "distillation_loss": 0.03792211413383484, "epoch": 3.59, "learning_rate": 2.401464506904082e-05, "loss": 0.0416, "step": 3781, "task_loss": 0.07421617209911346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832567031752546, "compression_loss": 0.0, "distillation_loss": 0.1570988893508911, "epoch": 3.59, "learning_rate": 2.400399819268846e-05, "loss": 0.1591, "step": 3782, "task_loss": 0.1775505691766739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832675192581403, "compression_loss": 0.0, "distillation_loss": 0.019645625725388527, "epoch": 3.59, "learning_rate": 2.399335149726463e-05, "loss": 0.0182, "step": 3783, "task_loss": 0.0054306890815496445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832783306819336, "compression_loss": 0.0, "distillation_loss": 0.0738186463713646, "epoch": 3.59, "learning_rate": 2.3982704984703337e-05, "loss": 0.068, "step": 3784, "task_loss": 0.015383878722786903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832891374476385, "compression_loss": 0.0, "distillation_loss": 0.02696252055466175, "epoch": 3.59, "learning_rate": 2.3972058656938587e-05, "loss": 0.0331, "step": 3785, "task_loss": 0.08802430331707001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7832999395562583, "compression_loss": 0.0, "distillation_loss": 0.05371105670928955, "epoch": 3.6, "learning_rate": 2.3961412515904336e-05, "loss": 0.0574, "step": 3786, "task_loss": 0.09069317579269409 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783310737008797, "compression_loss": 0.0, "distillation_loss": 0.0783916711807251, "epoch": 3.6, "learning_rate": 2.3950766563534508e-05, "loss": 0.0778, "step": 3787, "task_loss": 0.07237330079078674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833215298062581, "compression_loss": 0.0, "distillation_loss": 0.18467172980308533, "epoch": 3.6, "learning_rate": 2.394012080176298e-05, "loss": 0.1865, "step": 3788, "task_loss": 0.20330506563186646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833323179496454, "compression_loss": 0.0, "distillation_loss": 0.014792121946811676, "epoch": 3.6, "learning_rate": 2.3929475232523612e-05, "loss": 0.0138, "step": 3789, "task_loss": 0.004657162353396416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833431014399624, "compression_loss": 0.0, "distillation_loss": 0.023432932794094086, "epoch": 3.6, "learning_rate": 2.3918829857750233e-05, "loss": 0.0217, "step": 3790, "task_loss": 0.0064360033720731735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783353880278213, "compression_loss": 0.0, "distillation_loss": 0.023829486221075058, "epoch": 3.6, "learning_rate": 2.3908184679376608e-05, "loss": 0.0219, "step": 3791, "task_loss": 0.004044756293296814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833646544654007, "compression_loss": 0.0, "distillation_loss": 0.10334857553243637, "epoch": 3.6, "learning_rate": 2.3897539699336503e-05, "loss": 0.1155, "step": 3792, "task_loss": 0.22499464452266693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833754240025294, "compression_loss": 0.0, "distillation_loss": 0.020630966871976852, "epoch": 3.6, "learning_rate": 2.3886894919563603e-05, "loss": 0.0257, "step": 3793, "task_loss": 0.07151245325803757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833861888906025, "compression_loss": 0.0, "distillation_loss": 0.09565474092960358, "epoch": 3.6, "learning_rate": 2.387625034199162e-05, "loss": 0.0906, "step": 3794, "task_loss": 0.044928256422281265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7833969491306239, "compression_loss": 0.0, "distillation_loss": 0.061953071504831314, "epoch": 3.6, "learning_rate": 2.3865605968554163e-05, "loss": 0.0614, "step": 3795, "task_loss": 0.056104809045791626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834077047235971, "compression_loss": 0.0, "distillation_loss": 0.050460297614336014, "epoch": 3.6, "learning_rate": 2.385496180118485e-05, "loss": 0.0469, "step": 3796, "task_loss": 0.014742163941264153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783418455670526, "compression_loss": 0.0, "distillation_loss": 0.07291211187839508, "epoch": 3.61, "learning_rate": 2.3844317841817245e-05, "loss": 0.0822, "step": 3797, "task_loss": 0.1659778207540512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834292019724142, "compression_loss": 0.0, "distillation_loss": 0.21132490038871765, "epoch": 3.61, "learning_rate": 2.3833674092384864e-05, "loss": 0.2126, "step": 3798, "task_loss": 0.22443267703056335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834399436302653, "compression_loss": 0.0, "distillation_loss": 0.11052772402763367, "epoch": 3.61, "learning_rate": 2.3823030554821208e-05, "loss": 0.1048, "step": 3799, "task_loss": 0.053448185324668884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783450680645083, "compression_loss": 0.0, "distillation_loss": 0.02982490137219429, "epoch": 3.61, "learning_rate": 2.3812387231059712e-05, "loss": 0.028, "step": 3800, "task_loss": 0.011723890900611877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834614130178711, "compression_loss": 0.0, "distillation_loss": 0.03554895892739296, "epoch": 3.61, "learning_rate": 2.38017441230338e-05, "loss": 0.0389, "step": 3801, "task_loss": 0.0686713308095932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834721407496331, "compression_loss": 0.0, "distillation_loss": 0.03221399337053299, "epoch": 3.61, "learning_rate": 2.379110123267683e-05, "loss": 0.0301, "step": 3802, "task_loss": 0.011455537751317024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7834828638413729, "compression_loss": 0.0, "distillation_loss": 0.09096793085336685, "epoch": 3.61, "learning_rate": 2.3780458561922147e-05, "loss": 0.0944, "step": 3803, "task_loss": 0.12568344175815582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783493582294094, "compression_loss": 0.0, "distillation_loss": 0.034135833382606506, "epoch": 3.61, "learning_rate": 2.3769816112703047e-05, "loss": 0.0383, "step": 3804, "task_loss": 0.07552683353424072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835042961088001, "compression_loss": 0.0, "distillation_loss": 0.021872466430068016, "epoch": 3.61, "learning_rate": 2.375917388695277e-05, "loss": 0.0203, "step": 3805, "task_loss": 0.005825823172926903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835150052864951, "compression_loss": 0.0, "distillation_loss": 0.10276195406913757, "epoch": 3.61, "learning_rate": 2.3748531886604537e-05, "loss": 0.1085, "step": 3806, "task_loss": 0.15964293479919434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835257098281824, "compression_loss": 0.0, "distillation_loss": 0.0308642890304327, "epoch": 3.62, "learning_rate": 2.3737890113591507e-05, "loss": 0.0476, "step": 3807, "task_loss": 0.1985035240650177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835364097348657, "compression_loss": 0.0, "distillation_loss": 0.037669505923986435, "epoch": 3.62, "learning_rate": 2.372724856984682e-05, "loss": 0.0412, "step": 3808, "task_loss": 0.07280071079730988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783547105007549, "compression_loss": 0.0, "distillation_loss": 0.09298324584960938, "epoch": 3.62, "learning_rate": 2.3716607257303563e-05, "loss": 0.0889, "step": 3809, "task_loss": 0.051655180752277374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835577956472356, "compression_loss": 0.0, "distillation_loss": 0.11761534959077835, "epoch": 3.62, "learning_rate": 2.370596617789476e-05, "loss": 0.1226, "step": 3810, "task_loss": 0.1676085889339447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835684816549293, "compression_loss": 0.0, "distillation_loss": 0.11634726822376251, "epoch": 3.62, "learning_rate": 2.369532533355345e-05, "loss": 0.1063, "step": 3811, "task_loss": 0.01633966900408268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783579163031634, "compression_loss": 0.0, "distillation_loss": 0.09321273863315582, "epoch": 3.62, "learning_rate": 2.3684684726212574e-05, "loss": 0.0988, "step": 3812, "task_loss": 0.14873237907886505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7835898397783531, "compression_loss": 0.0, "distillation_loss": 0.0937819555401802, "epoch": 3.62, "learning_rate": 2.3674044357805058e-05, "loss": 0.0936, "step": 3813, "task_loss": 0.0917680636048317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836005118960904, "compression_loss": 0.0, "distillation_loss": 0.13557323813438416, "epoch": 3.62, "learning_rate": 2.3663404230263764e-05, "loss": 0.1304, "step": 3814, "task_loss": 0.08434079587459564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836111793858495, "compression_loss": 0.0, "distillation_loss": 0.11180786788463593, "epoch": 3.62, "learning_rate": 2.3652764345521527e-05, "loss": 0.1161, "step": 3815, "task_loss": 0.15484829246997833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836218422486342, "compression_loss": 0.0, "distillation_loss": 0.03923030570149422, "epoch": 3.62, "learning_rate": 2.364212470551114e-05, "loss": 0.0417, "step": 3816, "task_loss": 0.06417812407016754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836325004854481, "compression_loss": 0.0, "distillation_loss": 0.06536407768726349, "epoch": 3.62, "learning_rate": 2.3631485312165337e-05, "loss": 0.0612, "step": 3817, "task_loss": 0.023885080590844154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783643154097295, "compression_loss": 0.0, "distillation_loss": 0.2747918665409088, "epoch": 3.63, "learning_rate": 2.3620846167416816e-05, "loss": 0.2686, "step": 3818, "task_loss": 0.21282097697257996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836538030851784, "compression_loss": 0.0, "distillation_loss": 0.03365989029407501, "epoch": 3.63, "learning_rate": 2.3610207273198224e-05, "loss": 0.0308, "step": 3819, "task_loss": 0.005554560571908951 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836644474501022, "compression_loss": 0.0, "distillation_loss": 0.015433688648045063, "epoch": 3.63, "learning_rate": 2.359956863144219e-05, "loss": 0.0143, "step": 3820, "task_loss": 0.004542894661426544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836750871930699, "compression_loss": 0.0, "distillation_loss": 0.08182269334793091, "epoch": 3.63, "learning_rate": 2.358893024408125e-05, "loss": 0.0829, "step": 3821, "task_loss": 0.09263917803764343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836857223150853, "compression_loss": 0.0, "distillation_loss": 0.010741151869297028, "epoch": 3.63, "learning_rate": 2.3578292113047927e-05, "loss": 0.0144, "step": 3822, "task_loss": 0.0472935251891613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7836963528171519, "compression_loss": 0.0, "distillation_loss": 0.10976468026638031, "epoch": 3.63, "learning_rate": 2.3567654240274694e-05, "loss": 0.1043, "step": 3823, "task_loss": 0.05536213517189026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837069787002735, "compression_loss": 0.0, "distillation_loss": 0.03287689387798309, "epoch": 3.63, "learning_rate": 2.355701662769396e-05, "loss": 0.0514, "step": 3824, "task_loss": 0.21781839430332184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837175999654539, "compression_loss": 0.0, "distillation_loss": 0.03763230890035629, "epoch": 3.63, "learning_rate": 2.3546379277238107e-05, "loss": 0.0392, "step": 3825, "task_loss": 0.05362531542778015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837282166136966, "compression_loss": 0.0, "distillation_loss": 0.05240745097398758, "epoch": 3.63, "learning_rate": 2.3535742190839464e-05, "loss": 0.0651, "step": 3826, "task_loss": 0.17899471521377563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837388286460054, "compression_loss": 0.0, "distillation_loss": 0.03421130031347275, "epoch": 3.63, "learning_rate": 2.3525105370430296e-05, "loss": 0.0388, "step": 3827, "task_loss": 0.07988087832927704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783749436063384, "compression_loss": 0.0, "distillation_loss": 0.036636196076869965, "epoch": 3.64, "learning_rate": 2.351446881794284e-05, "loss": 0.0541, "step": 3828, "task_loss": 0.21091753244400024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837600388668359, "compression_loss": 0.0, "distillation_loss": 0.029964663088321686, "epoch": 3.64, "learning_rate": 2.3503832535309282e-05, "loss": 0.032, "step": 3829, "task_loss": 0.05079011991620064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837706370573649, "compression_loss": 0.0, "distillation_loss": 0.03737621754407883, "epoch": 3.64, "learning_rate": 2.3493196524461754e-05, "loss": 0.0481, "step": 3830, "task_loss": 0.14465513825416565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837812306359748, "compression_loss": 0.0, "distillation_loss": 0.05270380899310112, "epoch": 3.64, "learning_rate": 2.3482560787332325e-05, "loss": 0.052, "step": 3831, "task_loss": 0.04599842429161072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7837918196036691, "compression_loss": 0.0, "distillation_loss": 0.09582822024822235, "epoch": 3.64, "learning_rate": 2.3471925325853043e-05, "loss": 0.0926, "step": 3832, "task_loss": 0.06387031823396683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838024039614515, "compression_loss": 0.0, "distillation_loss": 0.04903242364525795, "epoch": 3.64, "learning_rate": 2.3461290141955886e-05, "loss": 0.0461, "step": 3833, "task_loss": 0.019668804481625557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838129837103258, "compression_loss": 0.0, "distillation_loss": 0.03492574021220207, "epoch": 3.64, "learning_rate": 2.345065523757278e-05, "loss": 0.0341, "step": 3834, "task_loss": 0.02714421972632408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838235588512955, "compression_loss": 0.0, "distillation_loss": 0.08957400172948837, "epoch": 3.64, "learning_rate": 2.3440020614635618e-05, "loss": 0.0918, "step": 3835, "task_loss": 0.11195338517427444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838341293853646, "compression_loss": 0.0, "distillation_loss": 0.029562341049313545, "epoch": 3.64, "learning_rate": 2.342938627507621e-05, "loss": 0.0338, "step": 3836, "task_loss": 0.07171028107404709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838446953135364, "compression_loss": 0.0, "distillation_loss": 0.022509558126330376, "epoch": 3.64, "learning_rate": 2.3418752220826364e-05, "loss": 0.0208, "step": 3837, "task_loss": 0.004994381219148636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838552566368148, "compression_loss": 0.0, "distillation_loss": 0.16286085546016693, "epoch": 3.64, "learning_rate": 2.3408118453817786e-05, "loss": 0.1811, "step": 3838, "task_loss": 0.3450527787208557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838658133562034, "compression_loss": 0.0, "distillation_loss": 0.09071379154920578, "epoch": 3.65, "learning_rate": 2.339748497598216e-05, "loss": 0.0975, "step": 3839, "task_loss": 0.1586238443851471 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.783876365472706, "compression_loss": 0.0, "distillation_loss": 0.06195370852947235, "epoch": 3.65, "learning_rate": 2.338685178925111e-05, "loss": 0.0588, "step": 3840, "task_loss": 0.030472123995423317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838869129873262, "compression_loss": 0.0, "distillation_loss": 0.12223900854587555, "epoch": 3.65, "learning_rate": 2.3376218895556196e-05, "loss": 0.1351, "step": 3841, "task_loss": 0.2507644295692444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7838974559010676, "compression_loss": 0.0, "distillation_loss": 0.18645763397216797, "epoch": 3.65, "learning_rate": 2.3365586296828944e-05, "loss": 0.1835, "step": 3842, "task_loss": 0.15729686617851257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839079942149341, "compression_loss": 0.0, "distillation_loss": 0.14159414172172546, "epoch": 3.65, "learning_rate": 2.335495399500081e-05, "loss": 0.1349, "step": 3843, "task_loss": 0.07418064028024673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839185279299291, "compression_loss": 0.0, "distillation_loss": 0.04566461965441704, "epoch": 3.65, "learning_rate": 2.3344321992003206e-05, "loss": 0.0428, "step": 3844, "task_loss": 0.017158547416329384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839290570470565, "compression_loss": 0.0, "distillation_loss": 0.15318387746810913, "epoch": 3.65, "learning_rate": 2.3333690289767477e-05, "loss": 0.1488, "step": 3845, "task_loss": 0.10943318903446198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839395815673199, "compression_loss": 0.0, "distillation_loss": 0.026680761948227882, "epoch": 3.65, "learning_rate": 2.3323058890224938e-05, "loss": 0.0248, "step": 3846, "task_loss": 0.008314013481140137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839501014917231, "compression_loss": 0.0, "distillation_loss": 0.03555554896593094, "epoch": 3.65, "learning_rate": 2.3312427795306835e-05, "loss": 0.0487, "step": 3847, "task_loss": 0.16655853390693665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839606168212695, "compression_loss": 0.0, "distillation_loss": 0.03203408420085907, "epoch": 3.65, "learning_rate": 2.330179700694434e-05, "loss": 0.044, "step": 3848, "task_loss": 0.15183991193771362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839711275569631, "compression_loss": 0.0, "distillation_loss": 0.047503869980573654, "epoch": 3.66, "learning_rate": 2.329116652706861e-05, "loss": 0.0442, "step": 3849, "task_loss": 0.01422363892197609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839816336998073, "compression_loss": 0.0, "distillation_loss": 0.04679963365197182, "epoch": 3.66, "learning_rate": 2.3280536357610704e-05, "loss": 0.0571, "step": 3850, "task_loss": 0.1500503420829773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7839921352508061, "compression_loss": 0.0, "distillation_loss": 0.020564064383506775, "epoch": 3.66, "learning_rate": 2.3269906500501647e-05, "loss": 0.0253, "step": 3851, "task_loss": 0.06842130422592163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840026322109629, "compression_loss": 0.0, "distillation_loss": 0.08808699995279312, "epoch": 3.66, "learning_rate": 2.325927695767241e-05, "loss": 0.0912, "step": 3852, "task_loss": 0.11908883601427078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840131245812815, "compression_loss": 0.0, "distillation_loss": 0.036189496517181396, "epoch": 3.66, "learning_rate": 2.3248647731053894e-05, "loss": 0.0553, "step": 3853, "task_loss": 0.22739224135875702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840236123627656, "compression_loss": 0.0, "distillation_loss": 0.05552596598863602, "epoch": 3.66, "learning_rate": 2.3238018822576947e-05, "loss": 0.0509, "step": 3854, "task_loss": 0.009532701224088669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840340955564189, "compression_loss": 0.0, "distillation_loss": 0.11583777517080307, "epoch": 3.66, "learning_rate": 2.3227390234172372e-05, "loss": 0.11, "step": 3855, "task_loss": 0.05766517296433449 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840445741632449, "compression_loss": 0.0, "distillation_loss": 0.04743620380759239, "epoch": 3.66, "learning_rate": 2.32167619677709e-05, "loss": 0.0638, "step": 3856, "task_loss": 0.2115105539560318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840550481842475, "compression_loss": 0.0, "distillation_loss": 0.035333409905433655, "epoch": 3.66, "learning_rate": 2.3206134025303206e-05, "loss": 0.0433, "step": 3857, "task_loss": 0.11506626009941101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840655176204303, "compression_loss": 0.0, "distillation_loss": 0.06820636987686157, "epoch": 3.66, "learning_rate": 2.31955064086999e-05, "loss": 0.0815, "step": 3858, "task_loss": 0.20162644982337952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784075982472797, "compression_loss": 0.0, "distillation_loss": 0.11444197595119476, "epoch": 3.66, "learning_rate": 2.3184879119891552e-05, "loss": 0.1087, "step": 3859, "task_loss": 0.057123761624097824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840864427423512, "compression_loss": 0.0, "distillation_loss": 0.09845539927482605, "epoch": 3.67, "learning_rate": 2.317425216080865e-05, "loss": 0.111, "step": 3860, "task_loss": 0.22417478263378143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7840968984300967, "compression_loss": 0.0, "distillation_loss": 0.1071721538901329, "epoch": 3.67, "learning_rate": 2.3163625533381636e-05, "loss": 0.1044, "step": 3861, "task_loss": 0.0796838253736496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841073495370371, "compression_loss": 0.0, "distillation_loss": 0.023740757256746292, "epoch": 3.67, "learning_rate": 2.315299923954088e-05, "loss": 0.0288, "step": 3862, "task_loss": 0.07385114580392838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841177960641761, "compression_loss": 0.0, "distillation_loss": 0.04698867350816727, "epoch": 3.67, "learning_rate": 2.314237328121672e-05, "loss": 0.0501, "step": 3863, "task_loss": 0.07777606695890427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841282380125174, "compression_loss": 0.0, "distillation_loss": 0.03970280662178993, "epoch": 3.67, "learning_rate": 2.3131747660339394e-05, "loss": 0.0383, "step": 3864, "task_loss": 0.026047592982649803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841386753830647, "compression_loss": 0.0, "distillation_loss": 0.03670245781540871, "epoch": 3.67, "learning_rate": 2.3121122378839106e-05, "loss": 0.0432, "step": 3865, "task_loss": 0.10145539790391922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841491081768216, "compression_loss": 0.0, "distillation_loss": 0.028605319559574127, "epoch": 3.67, "learning_rate": 2.3110497438645987e-05, "loss": 0.0363, "step": 3866, "task_loss": 0.10599753260612488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841595363947919, "compression_loss": 0.0, "distillation_loss": 0.11348830163478851, "epoch": 3.67, "learning_rate": 2.3099872841690103e-05, "loss": 0.108, "step": 3867, "task_loss": 0.05827032029628754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841699600379791, "compression_loss": 0.0, "distillation_loss": 0.08587086945772171, "epoch": 3.67, "learning_rate": 2.308924858990147e-05, "loss": 0.0806, "step": 3868, "task_loss": 0.033285610377788544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841803791073871, "compression_loss": 0.0, "distillation_loss": 0.08816444873809814, "epoch": 3.67, "learning_rate": 2.3078624685210042e-05, "loss": 0.0914, "step": 3869, "task_loss": 0.12038688361644745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7841907936040196, "compression_loss": 0.0, "distillation_loss": 0.13361284136772156, "epoch": 3.68, "learning_rate": 2.3068001129545686e-05, "loss": 0.1276, "step": 3870, "task_loss": 0.07371234893798828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78420120352888, "compression_loss": 0.0, "distillation_loss": 0.033696770668029785, "epoch": 3.68, "learning_rate": 2.305737792483822e-05, "loss": 0.0399, "step": 3871, "task_loss": 0.09580248594284058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842116088829721, "compression_loss": 0.0, "distillation_loss": 0.023039262741804123, "epoch": 3.68, "learning_rate": 2.3046755073017416e-05, "loss": 0.0341, "step": 3872, "task_loss": 0.13404729962348938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842220096672998, "compression_loss": 0.0, "distillation_loss": 0.014469185844063759, "epoch": 3.68, "learning_rate": 2.303613257601297e-05, "loss": 0.0137, "step": 3873, "task_loss": 0.006705537438392639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842324058828666, "compression_loss": 0.0, "distillation_loss": 0.06789643317461014, "epoch": 3.68, "learning_rate": 2.302551043575449e-05, "loss": 0.0626, "step": 3874, "task_loss": 0.015251386910676956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784242797530676, "compression_loss": 0.0, "distillation_loss": 0.04347585514187813, "epoch": 3.68, "learning_rate": 2.301488865417155e-05, "loss": 0.0488, "step": 3875, "task_loss": 0.09680940955877304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842531846117321, "compression_loss": 0.0, "distillation_loss": 0.04327643662691116, "epoch": 3.68, "learning_rate": 2.3004267233193655e-05, "loss": 0.0394, "step": 3876, "task_loss": 0.004441501572728157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842635671270383, "compression_loss": 0.0, "distillation_loss": 0.04019487649202347, "epoch": 3.68, "learning_rate": 2.2993646174750217e-05, "loss": 0.0394, "step": 3877, "task_loss": 0.032011084258556366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842739450775983, "compression_loss": 0.0, "distillation_loss": 0.024311494082212448, "epoch": 3.68, "learning_rate": 2.2983025480770627e-05, "loss": 0.026, "step": 3878, "task_loss": 0.040784791111946106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842843184644157, "compression_loss": 0.0, "distillation_loss": 0.1018083393573761, "epoch": 3.68, "learning_rate": 2.297240515318416e-05, "loss": 0.106, "step": 3879, "task_loss": 0.14343193173408508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7842946872884945, "compression_loss": 0.0, "distillation_loss": 0.12023551762104034, "epoch": 3.68, "learning_rate": 2.2961785193920058e-05, "loss": 0.1204, "step": 3880, "task_loss": 0.1214599758386612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843050515508381, "compression_loss": 0.0, "distillation_loss": 0.16024965047836304, "epoch": 3.69, "learning_rate": 2.2951165604907497e-05, "loss": 0.1819, "step": 3881, "task_loss": 0.37690994143486023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843154112524503, "compression_loss": 0.0, "distillation_loss": 0.1132129579782486, "epoch": 3.69, "learning_rate": 2.2940546388075572e-05, "loss": 0.1066, "step": 3882, "task_loss": 0.04658997803926468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843257663943348, "compression_loss": 0.0, "distillation_loss": 0.08600252866744995, "epoch": 3.69, "learning_rate": 2.2929927545353323e-05, "loss": 0.0817, "step": 3883, "task_loss": 0.0432564802467823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843361169774952, "compression_loss": 0.0, "distillation_loss": 0.06563156843185425, "epoch": 3.69, "learning_rate": 2.2919309078669697e-05, "loss": 0.0658, "step": 3884, "task_loss": 0.06688161194324493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843464630029351, "compression_loss": 0.0, "distillation_loss": 0.0378117635846138, "epoch": 3.69, "learning_rate": 2.2908690989953598e-05, "loss": 0.0374, "step": 3885, "task_loss": 0.03329796344041824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843568044716585, "compression_loss": 0.0, "distillation_loss": 0.08495815098285675, "epoch": 3.69, "learning_rate": 2.2898073281133853e-05, "loss": 0.0777, "step": 3886, "task_loss": 0.01194603368639946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843671413846687, "compression_loss": 0.0, "distillation_loss": 0.03463613986968994, "epoch": 3.69, "learning_rate": 2.2887455954139217e-05, "loss": 0.0355, "step": 3887, "task_loss": 0.04286995902657509 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843774737429696, "compression_loss": 0.0, "distillation_loss": 0.05026715248823166, "epoch": 3.69, "learning_rate": 2.2876839010898377e-05, "loss": 0.0459, "step": 3888, "task_loss": 0.00638798251748085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843878015475649, "compression_loss": 0.0, "distillation_loss": 0.062215156853199005, "epoch": 3.69, "learning_rate": 2.286622245333996e-05, "loss": 0.0596, "step": 3889, "task_loss": 0.03652032092213631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7843981247994581, "compression_loss": 0.0, "distillation_loss": 0.041499149054288864, "epoch": 3.69, "learning_rate": 2.2855606283392516e-05, "loss": 0.0464, "step": 3890, "task_loss": 0.09033425897359848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844084434996532, "compression_loss": 0.0, "distillation_loss": 0.04093802720308304, "epoch": 3.7, "learning_rate": 2.2844990502984513e-05, "loss": 0.0507, "step": 3891, "task_loss": 0.13819225132465363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844187576491536, "compression_loss": 0.0, "distillation_loss": 0.09159182012081146, "epoch": 3.7, "learning_rate": 2.2834375114044375e-05, "loss": 0.0887, "step": 3892, "task_loss": 0.06316046416759491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844290672489631, "compression_loss": 0.0, "distillation_loss": 0.15046623349189758, "epoch": 3.7, "learning_rate": 2.2823760118500415e-05, "loss": 0.1502, "step": 3893, "task_loss": 0.14759182929992676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844393723000853, "compression_loss": 0.0, "distillation_loss": 0.018377002328634262, "epoch": 3.7, "learning_rate": 2.2813145518280914e-05, "loss": 0.0169, "step": 3894, "task_loss": 0.003946490585803986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784449672803524, "compression_loss": 0.0, "distillation_loss": 0.09543585032224655, "epoch": 3.7, "learning_rate": 2.2802531315314065e-05, "loss": 0.101, "step": 3895, "task_loss": 0.15144692361354828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844599687602828, "compression_loss": 0.0, "distillation_loss": 0.17009879648685455, "epoch": 3.7, "learning_rate": 2.279191751152798e-05, "loss": 0.1709, "step": 3896, "task_loss": 0.17769216001033783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844702601713653, "compression_loss": 0.0, "distillation_loss": 0.11712504923343658, "epoch": 3.7, "learning_rate": 2.2781304108850706e-05, "loss": 0.1132, "step": 3897, "task_loss": 0.07775254547595978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844805470377754, "compression_loss": 0.0, "distillation_loss": 0.04855842888355255, "epoch": 3.7, "learning_rate": 2.2770691109210235e-05, "loss": 0.0595, "step": 3898, "task_loss": 0.1582665592432022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7844908293605166, "compression_loss": 0.0, "distillation_loss": 0.07292292267084122, "epoch": 3.7, "learning_rate": 2.2760078514534462e-05, "loss": 0.0764, "step": 3899, "task_loss": 0.10792665183544159 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845011071405927, "compression_loss": 0.0, "distillation_loss": 0.10464300960302353, "epoch": 3.7, "learning_rate": 2.2749466326751213e-05, "loss": 0.1044, "step": 3900, "task_loss": 0.10204927623271942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845113803790073, "compression_loss": 0.0, "distillation_loss": 0.03131140023469925, "epoch": 3.7, "learning_rate": 2.273885454778824e-05, "loss": 0.0288, "step": 3901, "task_loss": 0.0059508830308914185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845216490767641, "compression_loss": 0.0, "distillation_loss": 0.050689488649368286, "epoch": 3.71, "learning_rate": 2.272824317957324e-05, "loss": 0.0619, "step": 3902, "task_loss": 0.16312363743782043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845319132348668, "compression_loss": 0.0, "distillation_loss": 0.11499884724617004, "epoch": 3.71, "learning_rate": 2.2717632224033796e-05, "loss": 0.1087, "step": 3903, "task_loss": 0.051606349647045135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845421728543192, "compression_loss": 0.0, "distillation_loss": 0.15508276224136353, "epoch": 3.71, "learning_rate": 2.2707021683097454e-05, "loss": 0.1501, "step": 3904, "task_loss": 0.10518581420183182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845524279361247, "compression_loss": 0.0, "distillation_loss": 0.0731472373008728, "epoch": 3.71, "learning_rate": 2.2696411558691672e-05, "loss": 0.072, "step": 3905, "task_loss": 0.06154020130634308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845626784812871, "compression_loss": 0.0, "distillation_loss": 0.08605273813009262, "epoch": 3.71, "learning_rate": 2.268580185274381e-05, "loss": 0.0926, "step": 3906, "task_loss": 0.15165117383003235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845729244908103, "compression_loss": 0.0, "distillation_loss": 0.020624032244086266, "epoch": 3.71, "learning_rate": 2.2675192567181197e-05, "loss": 0.019, "step": 3907, "task_loss": 0.004072193056344986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7845831659656977, "compression_loss": 0.0, "distillation_loss": 0.09648367762565613, "epoch": 3.71, "learning_rate": 2.2664583703931047e-05, "loss": 0.0974, "step": 3908, "task_loss": 0.10594139993190765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784593402906953, "compression_loss": 0.0, "distillation_loss": 0.03631186485290527, "epoch": 3.71, "learning_rate": 2.265397526492052e-05, "loss": 0.0487, "step": 3909, "task_loss": 0.16029351949691772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846036353155801, "compression_loss": 0.0, "distillation_loss": 0.10707633942365646, "epoch": 3.71, "learning_rate": 2.2643367252076684e-05, "loss": 0.1087, "step": 3910, "task_loss": 0.12293732911348343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846138631925825, "compression_loss": 0.0, "distillation_loss": 0.027988499030470848, "epoch": 3.71, "learning_rate": 2.263275966732653e-05, "loss": 0.0256, "step": 3911, "task_loss": 0.00423845648765564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784624086538964, "compression_loss": 0.0, "distillation_loss": 0.06167607381939888, "epoch": 3.72, "learning_rate": 2.262215251259699e-05, "loss": 0.0595, "step": 3912, "task_loss": 0.04003376513719559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846343053557281, "compression_loss": 0.0, "distillation_loss": 0.10655433684587479, "epoch": 3.72, "learning_rate": 2.2611545789814898e-05, "loss": 0.111, "step": 3913, "task_loss": 0.15064670145511627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846445196438786, "compression_loss": 0.0, "distillation_loss": 0.04435639828443527, "epoch": 3.72, "learning_rate": 2.2600939500907007e-05, "loss": 0.0445, "step": 3914, "task_loss": 0.04605961963534355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846547294044193, "compression_loss": 0.0, "distillation_loss": 0.08209598064422607, "epoch": 3.72, "learning_rate": 2.2590333647800013e-05, "loss": 0.0942, "step": 3915, "task_loss": 0.2029554843902588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846649346383536, "compression_loss": 0.0, "distillation_loss": 0.03713390976190567, "epoch": 3.72, "learning_rate": 2.2579728232420525e-05, "loss": 0.0419, "step": 3916, "task_loss": 0.08507491648197174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846751353466855, "compression_loss": 0.0, "distillation_loss": 0.08504173159599304, "epoch": 3.72, "learning_rate": 2.2569123256695056e-05, "loss": 0.0915, "step": 3917, "task_loss": 0.1496903896331787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846853315304185, "compression_loss": 0.0, "distillation_loss": 0.06336773186922073, "epoch": 3.72, "learning_rate": 2.2558518722550048e-05, "loss": 0.0811, "step": 3918, "task_loss": 0.2407415211200714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7846955231905562, "compression_loss": 0.0, "distillation_loss": 0.08145183324813843, "epoch": 3.72, "learning_rate": 2.2547914631911884e-05, "loss": 0.0866, "step": 3919, "task_loss": 0.1333352029323578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7847057103281025, "compression_loss": 0.0, "distillation_loss": 0.10730397701263428, "epoch": 3.72, "learning_rate": 2.2537310986706826e-05, "loss": 0.1128, "step": 3920, "task_loss": 0.16203323006629944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784715892944061, "compression_loss": 0.0, "distillation_loss": 0.041547950357198715, "epoch": 3.72, "learning_rate": 2.2526707788861098e-05, "loss": 0.0386, "step": 3921, "task_loss": 0.011634528636932373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7847260710394351, "compression_loss": 0.0, "distillation_loss": 0.06141505762934685, "epoch": 3.72, "learning_rate": 2.2516105040300804e-05, "loss": 0.0588, "step": 3922, "task_loss": 0.03478226065635681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784736244615229, "compression_loss": 0.0, "distillation_loss": 0.06806781888008118, "epoch": 3.73, "learning_rate": 2.250550274295199e-05, "loss": 0.0689, "step": 3923, "task_loss": 0.07671485841274261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.784746413672446, "compression_loss": 0.0, "distillation_loss": 0.05792640894651413, "epoch": 3.73, "learning_rate": 2.2494900898740616e-05, "loss": 0.0609, "step": 3924, "task_loss": 0.08743541687726974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78475657821209, "compression_loss": 0.0, "distillation_loss": 0.04159487783908844, "epoch": 3.73, "learning_rate": 2.2484299509592563e-05, "loss": 0.0539, "step": 3925, "task_loss": 0.16499102115631104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7847667382351645, "compression_loss": 0.0, "distillation_loss": 0.038866378366947174, "epoch": 3.73, "learning_rate": 2.247369857743362e-05, "loss": 0.0449, "step": 3926, "task_loss": 0.0994512215256691 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7847768937426732, "compression_loss": 0.0, "distillation_loss": 0.14317655563354492, "epoch": 3.73, "learning_rate": 2.2463098104189497e-05, "loss": 0.1352, "step": 3927, "task_loss": 0.06382149457931519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78478704473562, "compression_loss": 0.0, "distillation_loss": 0.06667514145374298, "epoch": 3.73, "learning_rate": 2.2452498091785825e-05, "loss": 0.0739, "step": 3928, "task_loss": 0.13881635665893555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7847971912150083, "compression_loss": 0.0, "distillation_loss": 0.015922335907816887, "epoch": 3.73, "learning_rate": 2.244189854214814e-05, "loss": 0.0232, "step": 3929, "task_loss": 0.08892001956701279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848073331818419, "compression_loss": 0.0, "distillation_loss": 0.12270977348089218, "epoch": 3.73, "learning_rate": 2.24312994572019e-05, "loss": 0.1286, "step": 3930, "task_loss": 0.1818765252828598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848174706371246, "compression_loss": 0.0, "distillation_loss": 0.09202725440263748, "epoch": 3.73, "learning_rate": 2.2420700838872493e-05, "loss": 0.0922, "step": 3931, "task_loss": 0.09422404319047928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78482760358186, "compression_loss": 0.0, "distillation_loss": 0.22878801822662354, "epoch": 3.73, "learning_rate": 2.2410102689085185e-05, "loss": 0.2324, "step": 3932, "task_loss": 0.2648504376411438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848377320170516, "compression_loss": 0.0, "distillation_loss": 0.03465723991394043, "epoch": 3.74, "learning_rate": 2.2399505009765214e-05, "loss": 0.0383, "step": 3933, "task_loss": 0.07061752676963806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848478559437033, "compression_loss": 0.0, "distillation_loss": 0.046556901186704636, "epoch": 3.74, "learning_rate": 2.2388907802837676e-05, "loss": 0.0425, "step": 3934, "task_loss": 0.005850574001669884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848579753628188, "compression_loss": 0.0, "distillation_loss": 0.02604757435619831, "epoch": 3.74, "learning_rate": 2.2378311070227616e-05, "loss": 0.0313, "step": 3935, "task_loss": 0.07850679755210876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848680902754016, "compression_loss": 0.0, "distillation_loss": 0.032080747187137604, "epoch": 3.74, "learning_rate": 2.2367714813859967e-05, "loss": 0.0366, "step": 3936, "task_loss": 0.07736522704362869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848782006824555, "compression_loss": 0.0, "distillation_loss": 0.030702613294124603, "epoch": 3.74, "learning_rate": 2.23571190356596e-05, "loss": 0.0332, "step": 3937, "task_loss": 0.05548100546002388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848883065849842, "compression_loss": 0.0, "distillation_loss": 0.06845727562904358, "epoch": 3.74, "learning_rate": 2.2346523737551296e-05, "loss": 0.0701, "step": 3938, "task_loss": 0.08478248119354248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7848984079839914, "compression_loss": 0.0, "distillation_loss": 0.030870838090777397, "epoch": 3.74, "learning_rate": 2.233592892145973e-05, "loss": 0.0387, "step": 3939, "task_loss": 0.10916754603385925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849085048804807, "compression_loss": 0.0, "distillation_loss": 0.1040521040558815, "epoch": 3.74, "learning_rate": 2.23253345893095e-05, "loss": 0.1096, "step": 3940, "task_loss": 0.15946507453918457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849185972754558, "compression_loss": 0.0, "distillation_loss": 0.04144272953271866, "epoch": 3.74, "learning_rate": 2.231474074302513e-05, "loss": 0.051, "step": 3941, "task_loss": 0.13734087347984314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849286851699203, "compression_loss": 0.0, "distillation_loss": 0.11080522835254669, "epoch": 3.74, "learning_rate": 2.2304147384531038e-05, "loss": 0.115, "step": 3942, "task_loss": 0.15283827483654022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849387685648781, "compression_loss": 0.0, "distillation_loss": 0.07673658430576324, "epoch": 3.74, "learning_rate": 2.2293554515751552e-05, "loss": 0.08, "step": 3943, "task_loss": 0.10954262316226959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849488474613326, "compression_loss": 0.0, "distillation_loss": 0.13339099287986755, "epoch": 3.75, "learning_rate": 2.2282962138610924e-05, "loss": 0.1443, "step": 3944, "task_loss": 0.2422405332326889 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849589218602878, "compression_loss": 0.0, "distillation_loss": 0.05010555312037468, "epoch": 3.75, "learning_rate": 2.2272370255033314e-05, "loss": 0.058, "step": 3945, "task_loss": 0.1292436569929123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849689917627473, "compression_loss": 0.0, "distillation_loss": 0.04166126996278763, "epoch": 3.75, "learning_rate": 2.226177886694278e-05, "loss": 0.0422, "step": 3946, "task_loss": 0.04674288257956505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849790571697145, "compression_loss": 0.0, "distillation_loss": 0.05979571491479874, "epoch": 3.75, "learning_rate": 2.22511879762633e-05, "loss": 0.0623, "step": 3947, "task_loss": 0.08474962413311005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849891180821934, "compression_loss": 0.0, "distillation_loss": 0.09109986573457718, "epoch": 3.75, "learning_rate": 2.2240597584918768e-05, "loss": 0.0975, "step": 3948, "task_loss": 0.155453622341156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7849991745011875, "compression_loss": 0.0, "distillation_loss": 0.07806719839572906, "epoch": 3.75, "learning_rate": 2.2230007694832963e-05, "loss": 0.0722, "step": 3949, "task_loss": 0.018949225544929504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850092264277005, "compression_loss": 0.0, "distillation_loss": 0.03633013740181923, "epoch": 3.75, "learning_rate": 2.2219418307929607e-05, "loss": 0.0392, "step": 3950, "task_loss": 0.06507201492786407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850192738627363, "compression_loss": 0.0, "distillation_loss": 0.029751278460025787, "epoch": 3.75, "learning_rate": 2.2208829426132307e-05, "loss": 0.0389, "step": 3951, "task_loss": 0.12085320800542831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850293168072984, "compression_loss": 0.0, "distillation_loss": 0.056585509330034256, "epoch": 3.75, "learning_rate": 2.219824105136459e-05, "loss": 0.0607, "step": 3952, "task_loss": 0.0976976677775383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850393552623904, "compression_loss": 0.0, "distillation_loss": 0.029133249074220657, "epoch": 3.75, "learning_rate": 2.218765318554987e-05, "loss": 0.0329, "step": 3953, "task_loss": 0.06695835292339325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850493892290161, "compression_loss": 0.0, "distillation_loss": 0.04859749227762222, "epoch": 3.75, "learning_rate": 2.2177065830611498e-05, "loss": 0.046, "step": 3954, "task_loss": 0.022643616423010826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850594187081792, "compression_loss": 0.0, "distillation_loss": 0.0373307503759861, "epoch": 3.76, "learning_rate": 2.2166478988472716e-05, "loss": 0.0352, "step": 3955, "task_loss": 0.015915043652057648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850694437008833, "compression_loss": 0.0, "distillation_loss": 0.05053357779979706, "epoch": 3.76, "learning_rate": 2.215589266105667e-05, "loss": 0.0505, "step": 3956, "task_loss": 0.049804605543613434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850794642081321, "compression_loss": 0.0, "distillation_loss": 0.016781002283096313, "epoch": 3.76, "learning_rate": 2.2145306850286424e-05, "loss": 0.0155, "step": 3957, "task_loss": 0.00426999107003212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850894802309294, "compression_loss": 0.0, "distillation_loss": 0.06677426397800446, "epoch": 3.76, "learning_rate": 2.2134721558084917e-05, "loss": 0.0677, "step": 3958, "task_loss": 0.07622140645980835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7850994917702788, "compression_loss": 0.0, "distillation_loss": 0.031092319637537003, "epoch": 3.76, "learning_rate": 2.2124136786375057e-05, "loss": 0.0348, "step": 3959, "task_loss": 0.06810992956161499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785109498827184, "compression_loss": 0.0, "distillation_loss": 0.04789033159613609, "epoch": 3.76, "learning_rate": 2.2113552537079597e-05, "loss": 0.0456, "step": 3960, "task_loss": 0.025089185684919357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851195014026485, "compression_loss": 0.0, "distillation_loss": 0.03125907480716705, "epoch": 3.76, "learning_rate": 2.2102968812121218e-05, "loss": 0.0292, "step": 3961, "task_loss": 0.011070974171161652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851294994976762, "compression_loss": 0.0, "distillation_loss": 0.0916096568107605, "epoch": 3.76, "learning_rate": 2.209238561342251e-05, "loss": 0.0876, "step": 3962, "task_loss": 0.05148671194911003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851394931132707, "compression_loss": 0.0, "distillation_loss": 0.17331336438655853, "epoch": 3.76, "learning_rate": 2.2081802942905955e-05, "loss": 0.1803, "step": 3963, "task_loss": 0.24269163608551025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851494822504358, "compression_loss": 0.0, "distillation_loss": 0.022669488564133644, "epoch": 3.76, "learning_rate": 2.2071220802493954e-05, "loss": 0.0208, "step": 3964, "task_loss": 0.0038123298436403275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785159466910175, "compression_loss": 0.0, "distillation_loss": 0.07016949355602264, "epoch": 3.77, "learning_rate": 2.2060639194108794e-05, "loss": 0.0671, "step": 3965, "task_loss": 0.039382204413414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785169447093492, "compression_loss": 0.0, "distillation_loss": 0.05281013250350952, "epoch": 3.77, "learning_rate": 2.2050058119672677e-05, "loss": 0.0531, "step": 3966, "task_loss": 0.05565192550420761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851794228013906, "compression_loss": 0.0, "distillation_loss": 0.1318967640399933, "epoch": 3.77, "learning_rate": 2.2039477581107714e-05, "loss": 0.1256, "step": 3967, "task_loss": 0.06887546926736832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851893940348745, "compression_loss": 0.0, "distillation_loss": 0.0660499557852745, "epoch": 3.77, "learning_rate": 2.2028897580335906e-05, "loss": 0.0626, "step": 3968, "task_loss": 0.03172999620437622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7851993607949473, "compression_loss": 0.0, "distillation_loss": 0.03410178795456886, "epoch": 3.77, "learning_rate": 2.2018318119279168e-05, "loss": 0.0312, "step": 3969, "task_loss": 0.004658637568354607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852093230826127, "compression_loss": 0.0, "distillation_loss": 0.16602616012096405, "epoch": 3.77, "learning_rate": 2.2007739199859294e-05, "loss": 0.169, "step": 3970, "task_loss": 0.19587615132331848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852192808988743, "compression_loss": 0.0, "distillation_loss": 0.05224110186100006, "epoch": 3.77, "learning_rate": 2.1997160823998016e-05, "loss": 0.0481, "step": 3971, "task_loss": 0.01118180900812149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852292342447359, "compression_loss": 0.0, "distillation_loss": 0.055276624858379364, "epoch": 3.77, "learning_rate": 2.1986582993616926e-05, "loss": 0.0502, "step": 3972, "task_loss": 0.004159906879067421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852391831212011, "compression_loss": 0.0, "distillation_loss": 0.05159646272659302, "epoch": 3.77, "learning_rate": 2.1976005710637554e-05, "loss": 0.0558, "step": 3973, "task_loss": 0.09320049732923508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852491275292737, "compression_loss": 0.0, "distillation_loss": 0.10250347852706909, "epoch": 3.77, "learning_rate": 2.196542897698131e-05, "loss": 0.1108, "step": 3974, "task_loss": 0.185111865401268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852590674699573, "compression_loss": 0.0, "distillation_loss": 0.04058488458395004, "epoch": 3.77, "learning_rate": 2.1954852794569493e-05, "loss": 0.0404, "step": 3975, "task_loss": 0.03915491700172424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852690029442555, "compression_loss": 0.0, "distillation_loss": 0.012665364891290665, "epoch": 3.78, "learning_rate": 2.194427716532334e-05, "loss": 0.0117, "step": 3976, "task_loss": 0.0031289253383874893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852789339531722, "compression_loss": 0.0, "distillation_loss": 0.07077420502901077, "epoch": 3.78, "learning_rate": 2.193370209116396e-05, "loss": 0.0676, "step": 3977, "task_loss": 0.038839004933834076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852888604977108, "compression_loss": 0.0, "distillation_loss": 0.039995912462472916, "epoch": 3.78, "learning_rate": 2.1923127574012365e-05, "loss": 0.0426, "step": 3978, "task_loss": 0.06648865342140198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7852987825788752, "compression_loss": 0.0, "distillation_loss": 0.06262241303920746, "epoch": 3.78, "learning_rate": 2.1912553615789462e-05, "loss": 0.0587, "step": 3979, "task_loss": 0.023578649386763573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853087001976691, "compression_loss": 0.0, "distillation_loss": 0.05083940178155899, "epoch": 3.78, "learning_rate": 2.190198021841606e-05, "loss": 0.0518, "step": 3980, "task_loss": 0.05996212735772133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853186133550959, "compression_loss": 0.0, "distillation_loss": 0.0958191305398941, "epoch": 3.78, "learning_rate": 2.189140738381288e-05, "loss": 0.0944, "step": 3981, "task_loss": 0.08185603469610214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853285220521596, "compression_loss": 0.0, "distillation_loss": 0.1051722839474678, "epoch": 3.78, "learning_rate": 2.188083511390051e-05, "loss": 0.1112, "step": 3982, "task_loss": 0.1649647355079651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853384262898637, "compression_loss": 0.0, "distillation_loss": 0.10239297151565552, "epoch": 3.78, "learning_rate": 2.1870263410599464e-05, "loss": 0.1246, "step": 3983, "task_loss": 0.3248556852340698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785348326069212, "compression_loss": 0.0, "distillation_loss": 0.04012586921453476, "epoch": 3.78, "learning_rate": 2.185969227583014e-05, "loss": 0.0377, "step": 3984, "task_loss": 0.015774773433804512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853582213912081, "compression_loss": 0.0, "distillation_loss": 0.019525595009326935, "epoch": 3.78, "learning_rate": 2.1849121711512847e-05, "loss": 0.0185, "step": 3985, "task_loss": 0.009618887677788734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853681122568558, "compression_loss": 0.0, "distillation_loss": 0.11984270811080933, "epoch": 3.79, "learning_rate": 2.1838551719567767e-05, "loss": 0.1232, "step": 3986, "task_loss": 0.1538366973400116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853779986671586, "compression_loss": 0.0, "distillation_loss": 0.10699717700481415, "epoch": 3.79, "learning_rate": 2.1827982301914993e-05, "loss": 0.1009, "step": 3987, "task_loss": 0.04600843787193298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853878806231203, "compression_loss": 0.0, "distillation_loss": 0.018371235579252243, "epoch": 3.79, "learning_rate": 2.1817413460474514e-05, "loss": 0.0415, "step": 3988, "task_loss": 0.2499411404132843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7853977581257445, "compression_loss": 0.0, "distillation_loss": 0.07233263552188873, "epoch": 3.79, "learning_rate": 2.18068451971662e-05, "loss": 0.0799, "step": 3989, "task_loss": 0.14828746020793915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785407631176035, "compression_loss": 0.0, "distillation_loss": 0.04718535393476486, "epoch": 3.79, "learning_rate": 2.1796277513909838e-05, "loss": 0.0454, "step": 3990, "task_loss": 0.029316851869225502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854174997749954, "compression_loss": 0.0, "distillation_loss": 0.03916257619857788, "epoch": 3.79, "learning_rate": 2.1785710412625098e-05, "loss": 0.0363, "step": 3991, "task_loss": 0.010277681052684784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854273639236293, "compression_loss": 0.0, "distillation_loss": 0.179625004529953, "epoch": 3.79, "learning_rate": 2.1775143895231533e-05, "loss": 0.1678, "step": 3992, "task_loss": 0.0614137165248394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854372236229407, "compression_loss": 0.0, "distillation_loss": 0.11892496049404144, "epoch": 3.79, "learning_rate": 2.1764577963648614e-05, "loss": 0.1157, "step": 3993, "task_loss": 0.08710844814777374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854470788739328, "compression_loss": 0.0, "distillation_loss": 0.04435303807258606, "epoch": 3.79, "learning_rate": 2.175401261979569e-05, "loss": 0.0626, "step": 3994, "task_loss": 0.2271975576877594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854569296776097, "compression_loss": 0.0, "distillation_loss": 0.04650871828198433, "epoch": 3.79, "learning_rate": 2.1743447865592016e-05, "loss": 0.0441, "step": 3995, "task_loss": 0.022587845101952553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785466776034975, "compression_loss": 0.0, "distillation_loss": 0.09561876952648163, "epoch": 3.79, "learning_rate": 2.173288370295671e-05, "loss": 0.0919, "step": 3996, "task_loss": 0.058294668793678284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854766179470322, "compression_loss": 0.0, "distillation_loss": 0.054196953773498535, "epoch": 3.8, "learning_rate": 2.1722320133808815e-05, "loss": 0.0555, "step": 3997, "task_loss": 0.06700082123279572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854864554147851, "compression_loss": 0.0, "distillation_loss": 0.09690621495246887, "epoch": 3.8, "learning_rate": 2.171175716006726e-05, "loss": 0.1012, "step": 3998, "task_loss": 0.14022237062454224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7854962884392374, "compression_loss": 0.0, "distillation_loss": 0.04198819026350975, "epoch": 3.8, "learning_rate": 2.1701194783650846e-05, "loss": 0.0455, "step": 3999, "task_loss": 0.07720094919204712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855061170213927, "compression_loss": 0.0, "distillation_loss": 0.12908399105072021, "epoch": 3.8, "learning_rate": 2.1690633006478293e-05, "loss": 0.1321, "step": 4000, "task_loss": 0.15927539765834808 }, { "epoch": 3.8, "eval_accuracy": 0.8944954128440367, "eval_loss": 0.41921311616897583, "eval_runtime": 18.4061, "eval_samples_per_second": 47.376, "eval_steps_per_second": 5.922, "step": 4000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855159411622548, "compression_loss": 0.0, "distillation_loss": 0.0658789873123169, "epoch": 3.8, "learning_rate": 2.1680071830468178e-05, "loss": 0.0634, "step": 4001, "task_loss": 0.04087566211819649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855257608628273, "compression_loss": 0.0, "distillation_loss": 0.06824058294296265, "epoch": 3.8, "learning_rate": 2.166951125753902e-05, "loss": 0.0721, "step": 4002, "task_loss": 0.10674886405467987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855355761241138, "compression_loss": 0.0, "distillation_loss": 0.12798132002353668, "epoch": 3.8, "learning_rate": 2.1658951289609174e-05, "loss": 0.1246, "step": 4003, "task_loss": 0.09367252886295319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855453869471183, "compression_loss": 0.0, "distillation_loss": 0.15844711661338806, "epoch": 3.8, "learning_rate": 2.1648391928596917e-05, "loss": 0.1684, "step": 4004, "task_loss": 0.25770193338394165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855551933328442, "compression_loss": 0.0, "distillation_loss": 0.029653755947947502, "epoch": 3.8, "learning_rate": 2.1637833176420417e-05, "loss": 0.0273, "step": 4005, "task_loss": 0.005788305774331093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855649952822952, "compression_loss": 0.0, "distillation_loss": 0.07723814994096756, "epoch": 3.8, "learning_rate": 2.1627275034997704e-05, "loss": 0.074, "step": 4006, "task_loss": 0.04511658847332001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785574792796475, "compression_loss": 0.0, "distillation_loss": 0.03552757203578949, "epoch": 3.81, "learning_rate": 2.161671750624673e-05, "loss": 0.0364, "step": 4007, "task_loss": 0.04422738030552864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855845858763874, "compression_loss": 0.0, "distillation_loss": 0.02493412420153618, "epoch": 3.81, "learning_rate": 2.160616059208531e-05, "loss": 0.0286, "step": 4008, "task_loss": 0.06140220910310745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7855943745230358, "compression_loss": 0.0, "distillation_loss": 0.024118445813655853, "epoch": 3.81, "learning_rate": 2.1595604294431158e-05, "loss": 0.041, "step": 4009, "task_loss": 0.19306635856628418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856041587374243, "compression_loss": 0.0, "distillation_loss": 0.06833194196224213, "epoch": 3.81, "learning_rate": 2.1585048615201885e-05, "loss": 0.0668, "step": 4010, "task_loss": 0.05287961661815643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856139385205563, "compression_loss": 0.0, "distillation_loss": 0.08782090991735458, "epoch": 3.81, "learning_rate": 2.1574493556314983e-05, "loss": 0.1022, "step": 4011, "task_loss": 0.23139870166778564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856237138734354, "compression_loss": 0.0, "distillation_loss": 0.03765169531106949, "epoch": 3.81, "learning_rate": 2.1563939119687828e-05, "loss": 0.0461, "step": 4012, "task_loss": 0.12255912274122238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856334847970656, "compression_loss": 0.0, "distillation_loss": 0.047839343547821045, "epoch": 3.81, "learning_rate": 2.155338530723767e-05, "loss": 0.0597, "step": 4013, "task_loss": 0.16615822911262512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856432512924504, "compression_loss": 0.0, "distillation_loss": 0.0834595113992691, "epoch": 3.81, "learning_rate": 2.154283212088168e-05, "loss": 0.0868, "step": 4014, "task_loss": 0.11699174344539642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856530133605933, "compression_loss": 0.0, "distillation_loss": 0.028070103377103806, "epoch": 3.81, "learning_rate": 2.1532279562536874e-05, "loss": 0.0308, "step": 4015, "task_loss": 0.05518289655447006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856627710024984, "compression_loss": 0.0, "distillation_loss": 0.019197819754481316, "epoch": 3.81, "learning_rate": 2.1521727634120192e-05, "loss": 0.0184, "step": 4016, "task_loss": 0.011533919721841812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785672524219169, "compression_loss": 0.0, "distillation_loss": 0.028306379914283752, "epoch": 3.81, "learning_rate": 2.151117633754844e-05, "loss": 0.0421, "step": 4017, "task_loss": 0.1661168932914734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856822730116091, "compression_loss": 0.0, "distillation_loss": 0.026535285636782646, "epoch": 3.82, "learning_rate": 2.150062567473829e-05, "loss": 0.0298, "step": 4018, "task_loss": 0.058951202780008316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7856920173808221, "compression_loss": 0.0, "distillation_loss": 0.040127944201231, "epoch": 3.82, "learning_rate": 2.1490075647606363e-05, "loss": 0.0386, "step": 4019, "task_loss": 0.025223994627594948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857017573278118, "compression_loss": 0.0, "distillation_loss": 0.02355261519551277, "epoch": 3.82, "learning_rate": 2.1479526258069087e-05, "loss": 0.0352, "step": 4020, "task_loss": 0.13954126834869385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857114928535818, "compression_loss": 0.0, "distillation_loss": 0.03306501358747482, "epoch": 3.82, "learning_rate": 2.1468977508042824e-05, "loss": 0.0479, "step": 4021, "task_loss": 0.1817673295736313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785721223959136, "compression_loss": 0.0, "distillation_loss": 0.06848086416721344, "epoch": 3.82, "learning_rate": 2.14584293994438e-05, "loss": 0.0723, "step": 4022, "task_loss": 0.10678940266370773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857309506454778, "compression_loss": 0.0, "distillation_loss": 0.03855840116739273, "epoch": 3.82, "learning_rate": 2.1447881934188134e-05, "loss": 0.0358, "step": 4023, "task_loss": 0.010923238471150398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857406729136112, "compression_loss": 0.0, "distillation_loss": 0.021052051335573196, "epoch": 3.82, "learning_rate": 2.143733511419183e-05, "loss": 0.0193, "step": 4024, "task_loss": 0.003061491996049881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857503907645396, "compression_loss": 0.0, "distillation_loss": 0.10116279125213623, "epoch": 3.82, "learning_rate": 2.142678894137075e-05, "loss": 0.1047, "step": 4025, "task_loss": 0.1363639235496521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857601041992668, "compression_loss": 0.0, "distillation_loss": 0.19848263263702393, "epoch": 3.82, "learning_rate": 2.1416243417640668e-05, "loss": 0.1934, "step": 4026, "task_loss": 0.14759844541549683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857698132187965, "compression_loss": 0.0, "distillation_loss": 0.10897897183895111, "epoch": 3.82, "learning_rate": 2.1405698544917225e-05, "loss": 0.1032, "step": 4027, "task_loss": 0.051134128123521805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857795178241324, "compression_loss": 0.0, "distillation_loss": 0.06620095670223236, "epoch": 3.83, "learning_rate": 2.139515432511596e-05, "loss": 0.0624, "step": 4028, "task_loss": 0.0285488348454237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785789218016278, "compression_loss": 0.0, "distillation_loss": 0.1283935308456421, "epoch": 3.83, "learning_rate": 2.138461076015227e-05, "loss": 0.1225, "step": 4029, "task_loss": 0.0697251558303833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7857989137962372, "compression_loss": 0.0, "distillation_loss": 0.05554177612066269, "epoch": 3.83, "learning_rate": 2.1374067851941445e-05, "loss": 0.0517, "step": 4030, "task_loss": 0.017079105600714684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858086051650136, "compression_loss": 0.0, "distillation_loss": 0.015999089926481247, "epoch": 3.83, "learning_rate": 2.1363525602398666e-05, "loss": 0.0233, "step": 4031, "task_loss": 0.08854260295629501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785818292123611, "compression_loss": 0.0, "distillation_loss": 0.12333665788173676, "epoch": 3.83, "learning_rate": 2.1352984013438964e-05, "loss": 0.1226, "step": 4032, "task_loss": 0.11570969223976135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858279746730328, "compression_loss": 0.0, "distillation_loss": 0.015158230438828468, "epoch": 3.83, "learning_rate": 2.134244308697729e-05, "loss": 0.0222, "step": 4033, "task_loss": 0.08555779606103897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858376528142829, "compression_loss": 0.0, "distillation_loss": 0.06008541211485863, "epoch": 3.83, "learning_rate": 2.133190282492844e-05, "loss": 0.064, "step": 4034, "task_loss": 0.09927615523338318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858473265483649, "compression_loss": 0.0, "distillation_loss": 0.1671043038368225, "epoch": 3.83, "learning_rate": 2.1321363229207096e-05, "loss": 0.1689, "step": 4035, "task_loss": 0.1853921115398407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858569958762825, "compression_loss": 0.0, "distillation_loss": 0.026313871145248413, "epoch": 3.83, "learning_rate": 2.131082430172785e-05, "loss": 0.0243, "step": 4036, "task_loss": 0.006411956623196602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858666607990394, "compression_loss": 0.0, "distillation_loss": 0.07281780242919922, "epoch": 3.83, "learning_rate": 2.1300286044405135e-05, "loss": 0.0689, "step": 4037, "task_loss": 0.033516231924295425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858763213176394, "compression_loss": 0.0, "distillation_loss": 0.04494217038154602, "epoch": 3.83, "learning_rate": 2.1289748459153283e-05, "loss": 0.0421, "step": 4038, "task_loss": 0.016057439148426056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.785885977433086, "compression_loss": 0.0, "distillation_loss": 0.02288595214486122, "epoch": 3.84, "learning_rate": 2.1279211547886485e-05, "loss": 0.0211, "step": 4039, "task_loss": 0.005246007815003395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7858956291463829, "compression_loss": 0.0, "distillation_loss": 0.041702110320329666, "epoch": 3.84, "learning_rate": 2.1268675312518833e-05, "loss": 0.0515, "step": 4040, "task_loss": 0.1393231451511383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859052764585338, "compression_loss": 0.0, "distillation_loss": 0.034420765936374664, "epoch": 3.84, "learning_rate": 2.1258139754964283e-05, "loss": 0.0314, "step": 4041, "task_loss": 0.004395313560962677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859149193705425, "compression_loss": 0.0, "distillation_loss": 0.057688407599925995, "epoch": 3.84, "learning_rate": 2.124760487713666e-05, "loss": 0.0542, "step": 4042, "task_loss": 0.022908154875040054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859245578834124, "compression_loss": 0.0, "distillation_loss": 0.06689159572124481, "epoch": 3.84, "learning_rate": 2.1237070680949686e-05, "loss": 0.0685, "step": 4043, "task_loss": 0.08265194296836853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859341919981475, "compression_loss": 0.0, "distillation_loss": 0.03234408050775528, "epoch": 3.84, "learning_rate": 2.122653716831694e-05, "loss": 0.0304, "step": 4044, "task_loss": 0.012456223368644714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859438217157514, "compression_loss": 0.0, "distillation_loss": 0.046421635895967484, "epoch": 3.84, "learning_rate": 2.12160043411519e-05, "loss": 0.0521, "step": 4045, "task_loss": 0.10356031358242035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859534470372276, "compression_loss": 0.0, "distillation_loss": 0.26599907875061035, "epoch": 3.84, "learning_rate": 2.120547220136789e-05, "loss": 0.2447, "step": 4046, "task_loss": 0.053442008793354034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78596306796358, "compression_loss": 0.0, "distillation_loss": 0.06889566034078598, "epoch": 3.84, "learning_rate": 2.1194940750878132e-05, "loss": 0.0723, "step": 4047, "task_loss": 0.10272115468978882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859726844958121, "compression_loss": 0.0, "distillation_loss": 0.03883613273501396, "epoch": 3.84, "learning_rate": 2.1184409991595713e-05, "loss": 0.0432, "step": 4048, "task_loss": 0.0823683887720108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859822966349278, "compression_loss": 0.0, "distillation_loss": 0.0397968553006649, "epoch": 3.85, "learning_rate": 2.117387992543359e-05, "loss": 0.0434, "step": 4049, "task_loss": 0.07577399909496307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7859919043819307, "compression_loss": 0.0, "distillation_loss": 0.17865076661109924, "epoch": 3.85, "learning_rate": 2.1163350554304613e-05, "loss": 0.1735, "step": 4050, "task_loss": 0.12746202945709229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860015077378243, "compression_loss": 0.0, "distillation_loss": 0.02501235529780388, "epoch": 3.85, "learning_rate": 2.1152821880121482e-05, "loss": 0.0232, "step": 4051, "task_loss": 0.007309248670935631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860111067036125, "compression_loss": 0.0, "distillation_loss": 0.07480224221944809, "epoch": 3.85, "learning_rate": 2.1142293904796783e-05, "loss": 0.0842, "step": 4052, "task_loss": 0.16905774176120758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860207012802989, "compression_loss": 0.0, "distillation_loss": 0.04464121162891388, "epoch": 3.85, "learning_rate": 2.1131766630242966e-05, "loss": 0.0478, "step": 4053, "task_loss": 0.07629342377185822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860302914688873, "compression_loss": 0.0, "distillation_loss": 0.023911893367767334, "epoch": 3.85, "learning_rate": 2.112124005837238e-05, "loss": 0.0219, "step": 4054, "task_loss": 0.004040185362100601 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786039877270381, "compression_loss": 0.0, "distillation_loss": 0.017198091372847557, "epoch": 3.85, "learning_rate": 2.1110714191097222e-05, "loss": 0.0159, "step": 4055, "task_loss": 0.004315854981541634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860494586857841, "compression_loss": 0.0, "distillation_loss": 0.09507577121257782, "epoch": 3.85, "learning_rate": 2.1100189030329558e-05, "loss": 0.0988, "step": 4056, "task_loss": 0.1326824277639389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860590357161003, "compression_loss": 0.0, "distillation_loss": 0.03281257301568985, "epoch": 3.85, "learning_rate": 2.108966457798134e-05, "loss": 0.0358, "step": 4057, "task_loss": 0.06237972527742386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860686083623328, "compression_loss": 0.0, "distillation_loss": 0.09298193454742432, "epoch": 3.85, "learning_rate": 2.107914083596438e-05, "loss": 0.1066, "step": 4058, "task_loss": 0.22964733839035034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860781766254858, "compression_loss": 0.0, "distillation_loss": 0.10477671027183533, "epoch": 3.85, "learning_rate": 2.106861780619037e-05, "loss": 0.0998, "step": 4059, "task_loss": 0.054976824671030045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860877405065628, "compression_loss": 0.0, "distillation_loss": 0.025005143135786057, "epoch": 3.86, "learning_rate": 2.105809549057088e-05, "loss": 0.0288, "step": 4060, "task_loss": 0.06275138258934021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7860973000065674, "compression_loss": 0.0, "distillation_loss": 0.06488427519798279, "epoch": 3.86, "learning_rate": 2.1047573891017306e-05, "loss": 0.0644, "step": 4061, "task_loss": 0.06053687259554863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861068551265034, "compression_loss": 0.0, "distillation_loss": 0.13458393514156342, "epoch": 3.86, "learning_rate": 2.103705300944099e-05, "loss": 0.1369, "step": 4062, "task_loss": 0.15822012722492218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861164058673743, "compression_loss": 0.0, "distillation_loss": 0.18830344080924988, "epoch": 3.86, "learning_rate": 2.1026532847753068e-05, "loss": 0.181, "step": 4063, "task_loss": 0.11519521474838257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786125952230184, "compression_loss": 0.0, "distillation_loss": 0.044180721044540405, "epoch": 3.86, "learning_rate": 2.1016013407864605e-05, "loss": 0.0402, "step": 4064, "task_loss": 0.004225835204124451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861354942159361, "compression_loss": 0.0, "distillation_loss": 0.03563191741704941, "epoch": 3.86, "learning_rate": 2.1005494691686482e-05, "loss": 0.0328, "step": 4065, "task_loss": 0.007603077217936516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861450318256342, "compression_loss": 0.0, "distillation_loss": 0.024195533245801926, "epoch": 3.86, "learning_rate": 2.0994976701129488e-05, "loss": 0.0302, "step": 4066, "task_loss": 0.08473625034093857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861545650602821, "compression_loss": 0.0, "distillation_loss": 0.02827250212430954, "epoch": 3.86, "learning_rate": 2.098445943810427e-05, "loss": 0.026, "step": 4067, "task_loss": 0.005987679585814476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861640939208835, "compression_loss": 0.0, "distillation_loss": 0.026403294876217842, "epoch": 3.86, "learning_rate": 2.0973942904521328e-05, "loss": 0.0253, "step": 4068, "task_loss": 0.0155414380133152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786173618408442, "compression_loss": 0.0, "distillation_loss": 0.11837053298950195, "epoch": 3.86, "learning_rate": 2.096342710229105e-05, "loss": 0.1148, "step": 4069, "task_loss": 0.08244297653436661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861831385239613, "compression_loss": 0.0, "distillation_loss": 0.04006524384021759, "epoch": 3.87, "learning_rate": 2.0952912033323672e-05, "loss": 0.0377, "step": 4070, "task_loss": 0.016373053193092346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7861926542684451, "compression_loss": 0.0, "distillation_loss": 0.0364631786942482, "epoch": 3.87, "learning_rate": 2.0942397699529325e-05, "loss": 0.0337, "step": 4071, "task_loss": 0.008914249017834663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786202165642897, "compression_loss": 0.0, "distillation_loss": 0.1414714902639389, "epoch": 3.87, "learning_rate": 2.0931884102817973e-05, "loss": 0.1359, "step": 4072, "task_loss": 0.08563140779733658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862116726483208, "compression_loss": 0.0, "distillation_loss": 0.0371423065662384, "epoch": 3.87, "learning_rate": 2.0921371245099463e-05, "loss": 0.0401, "step": 4073, "task_loss": 0.06720311939716339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862211752857201, "compression_loss": 0.0, "distillation_loss": 0.2344522476196289, "epoch": 3.87, "learning_rate": 2.0910859128283517e-05, "loss": 0.2293, "step": 4074, "task_loss": 0.18332399427890778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862306735560987, "compression_loss": 0.0, "distillation_loss": 0.042274147272109985, "epoch": 3.87, "learning_rate": 2.0900347754279698e-05, "loss": 0.0531, "step": 4075, "task_loss": 0.15049678087234497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.78624016746046, "compression_loss": 0.0, "distillation_loss": 0.045464303344488144, "epoch": 3.87, "learning_rate": 2.088983712499745e-05, "loss": 0.043, "step": 4076, "task_loss": 0.02061353251338005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786249656999808, "compression_loss": 0.0, "distillation_loss": 0.13638713955879211, "epoch": 3.87, "learning_rate": 2.0879327242346093e-05, "loss": 0.132, "step": 4077, "task_loss": 0.09256385266780853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862591421751463, "compression_loss": 0.0, "distillation_loss": 0.07257235050201416, "epoch": 3.87, "learning_rate": 2.0868818108234783e-05, "loss": 0.078, "step": 4078, "task_loss": 0.12710356712341309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862686229874785, "compression_loss": 0.0, "distillation_loss": 0.1785210520029068, "epoch": 3.87, "learning_rate": 2.0858309724572554e-05, "loss": 0.1706, "step": 4079, "task_loss": 0.09951774030923843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862780994378082, "compression_loss": 0.0, "distillation_loss": 0.04436881095170975, "epoch": 3.87, "learning_rate": 2.084780209326831e-05, "loss": 0.0416, "step": 4080, "task_loss": 0.016401471570134163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862875715271395, "compression_loss": 0.0, "distillation_loss": 0.026207074522972107, "epoch": 3.88, "learning_rate": 2.0837295216230826e-05, "loss": 0.0242, "step": 4081, "task_loss": 0.006385818123817444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7862970392564755, "compression_loss": 0.0, "distillation_loss": 0.07042695581912994, "epoch": 3.88, "learning_rate": 2.0826789095368705e-05, "loss": 0.0771, "step": 4082, "task_loss": 0.1371847689151764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863065026268203, "compression_loss": 0.0, "distillation_loss": 0.09259558469057083, "epoch": 3.88, "learning_rate": 2.081628373259044e-05, "loss": 0.097, "step": 4083, "task_loss": 0.13651143014431 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863159616391775, "compression_loss": 0.0, "distillation_loss": 0.1024775579571724, "epoch": 3.88, "learning_rate": 2.0805779129804397e-05, "loss": 0.0936, "step": 4084, "task_loss": 0.01337103545665741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863254162945507, "compression_loss": 0.0, "distillation_loss": 0.0833493024110794, "epoch": 3.88, "learning_rate": 2.0795275288918763e-05, "loss": 0.0871, "step": 4085, "task_loss": 0.12135336548089981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863348665939435, "compression_loss": 0.0, "distillation_loss": 0.03871447220444679, "epoch": 3.88, "learning_rate": 2.0784772211841624e-05, "loss": 0.0447, "step": 4086, "task_loss": 0.09859400987625122 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863443125383598, "compression_loss": 0.0, "distillation_loss": 0.03221891075372696, "epoch": 3.88, "learning_rate": 2.077426990048091e-05, "loss": 0.0301, "step": 4087, "task_loss": 0.011213263496756554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863537541288032, "compression_loss": 0.0, "distillation_loss": 0.02990700677037239, "epoch": 3.88, "learning_rate": 2.0763768356744428e-05, "loss": 0.0282, "step": 4088, "task_loss": 0.012575274333357811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863631913662773, "compression_loss": 0.0, "distillation_loss": 0.042817167937755585, "epoch": 3.88, "learning_rate": 2.075326758253982e-05, "loss": 0.0542, "step": 4089, "task_loss": 0.15655487775802612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863726242517859, "compression_loss": 0.0, "distillation_loss": 0.050644587725400925, "epoch": 3.88, "learning_rate": 2.0742767579774615e-05, "loss": 0.0495, "step": 4090, "task_loss": 0.03952382132411003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7863820527863326, "compression_loss": 0.0, "distillation_loss": 0.10132403671741486, "epoch": 3.89, "learning_rate": 2.073226835035618e-05, "loss": 0.105, "step": 4091, "task_loss": 0.13759130239486694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786391476970921, "compression_loss": 0.0, "distillation_loss": 0.05861423537135124, "epoch": 3.89, "learning_rate": 2.0721769896191752e-05, "loss": 0.066, "step": 4092, "task_loss": 0.13297078013420105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864008968065551, "compression_loss": 0.0, "distillation_loss": 0.02992338128387928, "epoch": 3.89, "learning_rate": 2.0711272219188423e-05, "loss": 0.0456, "step": 4093, "task_loss": 0.18631944060325623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864103122942382, "compression_loss": 0.0, "distillation_loss": 0.10860427469015121, "epoch": 3.89, "learning_rate": 2.0700775321253158e-05, "loss": 0.1143, "step": 4094, "task_loss": 0.16594167053699493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864197234349742, "compression_loss": 0.0, "distillation_loss": 0.08554205298423767, "epoch": 3.89, "learning_rate": 2.0690279204292753e-05, "loss": 0.1151, "step": 4095, "task_loss": 0.3806484341621399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864291302297668, "compression_loss": 0.0, "distillation_loss": 0.0441209115087986, "epoch": 3.89, "learning_rate": 2.0679783870213883e-05, "loss": 0.0408, "step": 4096, "task_loss": 0.011142965406179428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864385326796196, "compression_loss": 0.0, "distillation_loss": 0.05388110131025314, "epoch": 3.89, "learning_rate": 2.0669289320923086e-05, "loss": 0.049, "step": 4097, "task_loss": 0.004679244011640549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864479307855363, "compression_loss": 0.0, "distillation_loss": 0.033240221440792084, "epoch": 3.89, "learning_rate": 2.0658795558326743e-05, "loss": 0.031, "step": 4098, "task_loss": 0.011169865727424622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864573245485206, "compression_loss": 0.0, "distillation_loss": 0.06061680614948273, "epoch": 3.89, "learning_rate": 2.0648302584331092e-05, "loss": 0.0627, "step": 4099, "task_loss": 0.08101128786802292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864667139695761, "compression_loss": 0.0, "distillation_loss": 0.12393200397491455, "epoch": 3.89, "learning_rate": 2.0637810400842233e-05, "loss": 0.1224, "step": 4100, "task_loss": 0.10840637236833572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864760990497066, "compression_loss": 0.0, "distillation_loss": 0.05604955554008484, "epoch": 3.89, "learning_rate": 2.0627319009766127e-05, "loss": 0.0707, "step": 4101, "task_loss": 0.20234975218772888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864854797899157, "compression_loss": 0.0, "distillation_loss": 0.03791045770049095, "epoch": 3.9, "learning_rate": 2.0616828413008578e-05, "loss": 0.0351, "step": 4102, "task_loss": 0.009459014981985092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7864948561912071, "compression_loss": 0.0, "distillation_loss": 0.01791505143046379, "epoch": 3.9, "learning_rate": 2.0606338612475264e-05, "loss": 0.0255, "step": 4103, "task_loss": 0.09366492927074432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865042282545845, "compression_loss": 0.0, "distillation_loss": 0.14737428724765778, "epoch": 3.9, "learning_rate": 2.0595849610071697e-05, "loss": 0.1548, "step": 4104, "task_loss": 0.22147542238235474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865135959810516, "compression_loss": 0.0, "distillation_loss": 0.04210761934518814, "epoch": 3.9, "learning_rate": 2.058536140770325e-05, "loss": 0.0384, "step": 4105, "task_loss": 0.004892520606517792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865229593716121, "compression_loss": 0.0, "distillation_loss": 0.03413204848766327, "epoch": 3.9, "learning_rate": 2.057487400727517e-05, "loss": 0.0316, "step": 4106, "task_loss": 0.008931750431656837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865323184272696, "compression_loss": 0.0, "distillation_loss": 0.014508235268294811, "epoch": 3.9, "learning_rate": 2.0564387410692544e-05, "loss": 0.0136, "step": 4107, "task_loss": 0.00587865523993969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865416731490278, "compression_loss": 0.0, "distillation_loss": 0.1767938733100891, "epoch": 3.9, "learning_rate": 2.0553901619860306e-05, "loss": 0.1712, "step": 4108, "task_loss": 0.1211482584476471 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865510235378904, "compression_loss": 0.0, "distillation_loss": 0.11559472978115082, "epoch": 3.9, "learning_rate": 2.0543416636683246e-05, "loss": 0.1049, "step": 4109, "task_loss": 0.008573394268751144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865603695948611, "compression_loss": 0.0, "distillation_loss": 0.046687278896570206, "epoch": 3.9, "learning_rate": 2.0532932463066023e-05, "loss": 0.0461, "step": 4110, "task_loss": 0.04126298427581787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865697113209436, "compression_loss": 0.0, "distillation_loss": 0.06493042409420013, "epoch": 3.9, "learning_rate": 2.052244910091313e-05, "loss": 0.0873, "step": 4111, "task_loss": 0.28819745779037476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865790487171415, "compression_loss": 0.0, "distillation_loss": 0.10494603216648102, "epoch": 3.91, "learning_rate": 2.051196655212892e-05, "loss": 0.0967, "step": 4112, "task_loss": 0.022337011992931366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865883817844586, "compression_loss": 0.0, "distillation_loss": 0.12292291969060898, "epoch": 3.91, "learning_rate": 2.0501484818617594e-05, "loss": 0.1399, "step": 4113, "task_loss": 0.29296374320983887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7865977105238985, "compression_loss": 0.0, "distillation_loss": 0.043324559926986694, "epoch": 3.91, "learning_rate": 2.0491003902283225e-05, "loss": 0.0394, "step": 4114, "task_loss": 0.004396416246891022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866070349364648, "compression_loss": 0.0, "distillation_loss": 0.05673076957464218, "epoch": 3.91, "learning_rate": 2.048052380502971e-05, "loss": 0.0565, "step": 4115, "task_loss": 0.054648954421281815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866163550231614, "compression_loss": 0.0, "distillation_loss": 0.023540731519460678, "epoch": 3.91, "learning_rate": 2.047004452876081e-05, "loss": 0.0216, "step": 4116, "task_loss": 0.004315689206123352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866256707849918, "compression_loss": 0.0, "distillation_loss": 0.09178947657346725, "epoch": 3.91, "learning_rate": 2.045956607538015e-05, "loss": 0.0978, "step": 4117, "task_loss": 0.15143872797489166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866349822229597, "compression_loss": 0.0, "distillation_loss": 0.2266932725906372, "epoch": 3.91, "learning_rate": 2.0449088446791165e-05, "loss": 0.2216, "step": 4118, "task_loss": 0.17611849308013916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866442893380688, "compression_loss": 0.0, "distillation_loss": 0.027620844542980194, "epoch": 3.91, "learning_rate": 2.043861164489719e-05, "loss": 0.0354, "step": 4119, "task_loss": 0.1055789515376091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786653592131323, "compression_loss": 0.0, "distillation_loss": 0.0539475753903389, "epoch": 3.91, "learning_rate": 2.0428135671601373e-05, "loss": 0.0601, "step": 4120, "task_loss": 0.11543746292591095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866628906037256, "compression_loss": 0.0, "distillation_loss": 0.047250404953956604, "epoch": 3.91, "learning_rate": 2.041766052880673e-05, "loss": 0.0593, "step": 4121, "task_loss": 0.167374387383461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866721847562806, "compression_loss": 0.0, "distillation_loss": 0.05512014403939247, "epoch": 3.91, "learning_rate": 2.0407186218416114e-05, "loss": 0.0648, "step": 4122, "task_loss": 0.15170332789421082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7866814745899915, "compression_loss": 0.0, "distillation_loss": 0.03062654659152031, "epoch": 3.92, "learning_rate": 2.039671274233225e-05, "loss": 0.0388, "step": 4123, "task_loss": 0.11201991140842438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786690760105862, "compression_loss": 0.0, "distillation_loss": 0.01795804500579834, "epoch": 3.92, "learning_rate": 2.0386240102457682e-05, "loss": 0.0166, "step": 4124, "task_loss": 0.004647407680749893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867000413048959, "compression_loss": 0.0, "distillation_loss": 0.08352886140346527, "epoch": 3.92, "learning_rate": 2.0375768300694824e-05, "loss": 0.0783, "step": 4125, "task_loss": 0.030838267877697945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867093181880968, "compression_loss": 0.0, "distillation_loss": 0.16173683106899261, "epoch": 3.92, "learning_rate": 2.0365297338945917e-05, "loss": 0.1537, "step": 4126, "task_loss": 0.08173392713069916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867185907564682, "compression_loss": 0.0, "distillation_loss": 0.05559203773736954, "epoch": 3.92, "learning_rate": 2.035482721911308e-05, "loss": 0.0688, "step": 4127, "task_loss": 0.18805257976055145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867278590110142, "compression_loss": 0.0, "distillation_loss": 0.03582768887281418, "epoch": 3.92, "learning_rate": 2.034435794309824e-05, "loss": 0.0388, "step": 4128, "task_loss": 0.06604026257991791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867371229527381, "compression_loss": 0.0, "distillation_loss": 0.042852409183979034, "epoch": 3.92, "learning_rate": 2.0333889512803204e-05, "loss": 0.0475, "step": 4129, "task_loss": 0.08887660503387451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867463825826438, "compression_loss": 0.0, "distillation_loss": 0.10272371768951416, "epoch": 3.92, "learning_rate": 2.0323421930129617e-05, "loss": 0.0932, "step": 4130, "task_loss": 0.007061410695314407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867556379017349, "compression_loss": 0.0, "distillation_loss": 0.15910032391548157, "epoch": 3.92, "learning_rate": 2.031295519697895e-05, "loss": 0.1638, "step": 4131, "task_loss": 0.2062944769859314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786764888911015, "compression_loss": 0.0, "distillation_loss": 0.15020066499710083, "epoch": 3.92, "learning_rate": 2.0302489315252545e-05, "loss": 0.17, "step": 4132, "task_loss": 0.3483584523200989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786774135611488, "compression_loss": 0.0, "distillation_loss": 0.07658859342336655, "epoch": 3.92, "learning_rate": 2.0292024286851584e-05, "loss": 0.0723, "step": 4133, "task_loss": 0.03387470170855522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867833780041574, "compression_loss": 0.0, "distillation_loss": 0.06731578707695007, "epoch": 3.93, "learning_rate": 2.0281560113677086e-05, "loss": 0.0683, "step": 4134, "task_loss": 0.07722228765487671 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7867926160900269, "compression_loss": 0.0, "distillation_loss": 0.03083537518978119, "epoch": 3.93, "learning_rate": 2.0271096797629915e-05, "loss": 0.0292, "step": 4135, "task_loss": 0.014749417081475258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868018498701004, "compression_loss": 0.0, "distillation_loss": 0.055131688714027405, "epoch": 3.93, "learning_rate": 2.0260634340610786e-05, "loss": 0.0589, "step": 4136, "task_loss": 0.09324628859758377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868110793453812, "compression_loss": 0.0, "distillation_loss": 0.06215892732143402, "epoch": 3.93, "learning_rate": 2.0250172744520258e-05, "loss": 0.0583, "step": 4137, "task_loss": 0.023460306227207184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868203045168732, "compression_loss": 0.0, "distillation_loss": 0.11791975051164627, "epoch": 3.93, "learning_rate": 2.023971201125872e-05, "loss": 0.1233, "step": 4138, "task_loss": 0.17152175307273865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868295253855802, "compression_loss": 0.0, "distillation_loss": 0.03688070923089981, "epoch": 3.93, "learning_rate": 2.0229252142726415e-05, "loss": 0.0461, "step": 4139, "task_loss": 0.12953397631645203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868387419525057, "compression_loss": 0.0, "distillation_loss": 0.059014152735471725, "epoch": 3.93, "learning_rate": 2.021879314082344e-05, "loss": 0.0663, "step": 4140, "task_loss": 0.13181355595588684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868479542186534, "compression_loss": 0.0, "distillation_loss": 0.31541213393211365, "epoch": 3.93, "learning_rate": 2.0208335007449726e-05, "loss": 0.3036, "step": 4141, "task_loss": 0.19743433594703674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786857162185027, "compression_loss": 0.0, "distillation_loss": 0.029475336894392967, "epoch": 3.93, "learning_rate": 2.019787774450503e-05, "loss": 0.0269, "step": 4142, "task_loss": 0.004132760688662529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868663658526301, "compression_loss": 0.0, "distillation_loss": 0.13314858078956604, "epoch": 3.93, "learning_rate": 2.0187421353888966e-05, "loss": 0.1294, "step": 4143, "task_loss": 0.09587833285331726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868755652224667, "compression_loss": 0.0, "distillation_loss": 0.05543944984674454, "epoch": 3.94, "learning_rate": 2.0176965837500995e-05, "loss": 0.0528, "step": 4144, "task_loss": 0.029170924797654152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868847602955401, "compression_loss": 0.0, "distillation_loss": 0.031388137489557266, "epoch": 3.94, "learning_rate": 2.0166511197240405e-05, "loss": 0.0293, "step": 4145, "task_loss": 0.01011231541633606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7868939510728542, "compression_loss": 0.0, "distillation_loss": 0.1392134577035904, "epoch": 3.94, "learning_rate": 2.015605743500634e-05, "loss": 0.1419, "step": 4146, "task_loss": 0.16592922806739807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869031375554126, "compression_loss": 0.0, "distillation_loss": 0.04586111381649971, "epoch": 3.94, "learning_rate": 2.0145604552697763e-05, "loss": 0.0487, "step": 4147, "task_loss": 0.07397586107254028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.786912319744219, "compression_loss": 0.0, "distillation_loss": 0.030558858066797256, "epoch": 3.94, "learning_rate": 2.0135152552213493e-05, "loss": 0.0542, "step": 4148, "task_loss": 0.2674226462841034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869214976402772, "compression_loss": 0.0, "distillation_loss": 0.1152239590883255, "epoch": 3.94, "learning_rate": 2.0124701435452198e-05, "loss": 0.1043, "step": 4149, "task_loss": 0.005567222833633423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869306712445906, "compression_loss": 0.0, "distillation_loss": 0.15521375834941864, "epoch": 3.94, "learning_rate": 2.0114251204312367e-05, "loss": 0.1738, "step": 4150, "task_loss": 0.3411465287208557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869398405581631, "compression_loss": 0.0, "distillation_loss": 0.07683128863573074, "epoch": 3.94, "learning_rate": 2.010380186069234e-05, "loss": 0.0698, "step": 4151, "task_loss": 0.0065435003489255905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869490055819984, "compression_loss": 0.0, "distillation_loss": 0.07263028621673584, "epoch": 3.94, "learning_rate": 2.009335340649028e-05, "loss": 0.0779, "step": 4152, "task_loss": 0.12566472589969635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869581663171001, "compression_loss": 0.0, "distillation_loss": 0.10479018092155457, "epoch": 3.94, "learning_rate": 2.0082905843604206e-05, "loss": 0.1073, "step": 4153, "task_loss": 0.13012909889221191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869673227644719, "compression_loss": 0.0, "distillation_loss": 0.0787314772605896, "epoch": 3.94, "learning_rate": 2.0072459173931964e-05, "loss": 0.0745, "step": 4154, "task_loss": 0.03596143424510956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869764749251175, "compression_loss": 0.0, "distillation_loss": 0.02369462139904499, "epoch": 3.95, "learning_rate": 2.0062013399371245e-05, "loss": 0.022, "step": 4155, "task_loss": 0.007038429379463196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869856228000405, "compression_loss": 0.0, "distillation_loss": 0.018353287130594254, "epoch": 3.95, "learning_rate": 2.005156852181958e-05, "loss": 0.0253, "step": 4156, "task_loss": 0.08732615411281586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7869947663902447, "compression_loss": 0.0, "distillation_loss": 0.026407258585095406, "epoch": 3.95, "learning_rate": 2.004112454317431e-05, "loss": 0.0248, "step": 4157, "task_loss": 0.010559692978858948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870039056967336, "compression_loss": 0.0, "distillation_loss": 0.018740851432085037, "epoch": 3.95, "learning_rate": 2.003068146533266e-05, "loss": 0.0175, "step": 4158, "task_loss": 0.00584782287478447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870130407205111, "compression_loss": 0.0, "distillation_loss": 0.1747373789548874, "epoch": 3.95, "learning_rate": 2.002023929019165e-05, "loss": 0.1671, "step": 4159, "task_loss": 0.09856338798999786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870221714625809, "compression_loss": 0.0, "distillation_loss": 0.05840107798576355, "epoch": 3.95, "learning_rate": 2.0009798019648163e-05, "loss": 0.0601, "step": 4160, "task_loss": 0.07528725266456604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870312979239464, "compression_loss": 0.0, "distillation_loss": 0.052719514816999435, "epoch": 3.95, "learning_rate": 1.9999357655598893e-05, "loss": 0.0555, "step": 4161, "task_loss": 0.08041465282440186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870404201056116, "compression_loss": 0.0, "distillation_loss": 0.11436031758785248, "epoch": 3.95, "learning_rate": 1.9988918199940386e-05, "loss": 0.1258, "step": 4162, "task_loss": 0.22912541031837463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870495380085799, "compression_loss": 0.0, "distillation_loss": 0.027497505769133568, "epoch": 3.95, "learning_rate": 1.997847965456903e-05, "loss": 0.0335, "step": 4163, "task_loss": 0.08756634593009949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870586516338552, "compression_loss": 0.0, "distillation_loss": 0.12368052452802658, "epoch": 3.95, "learning_rate": 1.9968042021381023e-05, "loss": 0.1237, "step": 4164, "task_loss": 0.12429526448249817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870677609824411, "compression_loss": 0.0, "distillation_loss": 0.03644444793462753, "epoch": 3.96, "learning_rate": 1.9957605302272412e-05, "loss": 0.0463, "step": 4165, "task_loss": 0.13482612371444702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870768660553412, "compression_loss": 0.0, "distillation_loss": 0.1511649489402771, "epoch": 3.96, "learning_rate": 1.9947169499139083e-05, "loss": 0.1411, "step": 4166, "task_loss": 0.05041804164648056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870859668535594, "compression_loss": 0.0, "distillation_loss": 0.16785401105880737, "epoch": 3.96, "learning_rate": 1.9936734613876762e-05, "loss": 0.1752, "step": 4167, "task_loss": 0.24086761474609375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7870950633780992, "compression_loss": 0.0, "distillation_loss": 0.06589843332767487, "epoch": 3.96, "learning_rate": 1.9926300648380976e-05, "loss": 0.0679, "step": 4168, "task_loss": 0.08612485975027084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871041556299643, "compression_loss": 0.0, "distillation_loss": 0.1752784252166748, "epoch": 3.96, "learning_rate": 1.9915867604547106e-05, "loss": 0.168, "step": 4169, "task_loss": 0.10218894481658936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871132436101584, "compression_loss": 0.0, "distillation_loss": 0.06247349828481674, "epoch": 3.96, "learning_rate": 1.9905435484270383e-05, "loss": 0.0576, "step": 4170, "task_loss": 0.014096638187766075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871223273196852, "compression_loss": 0.0, "distillation_loss": 0.032126858830451965, "epoch": 3.96, "learning_rate": 1.989500428944583e-05, "loss": 0.0376, "step": 4171, "task_loss": 0.08727513253688812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871314067595484, "compression_loss": 0.0, "distillation_loss": 0.032654955983161926, "epoch": 3.96, "learning_rate": 1.988457402196834e-05, "loss": 0.0306, "step": 4172, "task_loss": 0.01211773045361042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871404819307517, "compression_loss": 0.0, "distillation_loss": 0.018787803128361702, "epoch": 3.96, "learning_rate": 1.9874144683732615e-05, "loss": 0.0173, "step": 4173, "task_loss": 0.0037378836423158646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871495528342987, "compression_loss": 0.0, "distillation_loss": 0.04686371237039566, "epoch": 3.96, "learning_rate": 1.9863716276633186e-05, "loss": 0.0444, "step": 4174, "task_loss": 0.021781545132398605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.787158619471193, "compression_loss": 0.0, "distillation_loss": 0.089274100959301, "epoch": 3.96, "learning_rate": 1.9853288802564438e-05, "loss": 0.0895, "step": 4175, "task_loss": 0.09111557900905609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871676818424386, "compression_loss": 0.0, "distillation_loss": 0.0693182647228241, "epoch": 3.97, "learning_rate": 1.9842862263420564e-05, "loss": 0.0683, "step": 4176, "task_loss": 0.05941478908061981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871767399490389, "compression_loss": 0.0, "distillation_loss": 0.02392822504043579, "epoch": 3.97, "learning_rate": 1.9832436661095604e-05, "loss": 0.0366, "step": 4177, "task_loss": 0.15036530792713165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871857937919977, "compression_loss": 0.0, "distillation_loss": 0.07844184339046478, "epoch": 3.97, "learning_rate": 1.98220119974834e-05, "loss": 0.0762, "step": 4178, "task_loss": 0.0561903640627861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7871948433723186, "compression_loss": 0.0, "distillation_loss": 0.08809831738471985, "epoch": 3.97, "learning_rate": 1.9811588274477665e-05, "loss": 0.0907, "step": 4179, "task_loss": 0.11436465382575989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872038886910053, "compression_loss": 0.0, "distillation_loss": 0.02092152088880539, "epoch": 3.97, "learning_rate": 1.980116549397191e-05, "loss": 0.0242, "step": 4180, "task_loss": 0.05349248647689819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872129297490617, "compression_loss": 0.0, "distillation_loss": 0.10855552554130554, "epoch": 3.97, "learning_rate": 1.979074365785947e-05, "loss": 0.104, "step": 4181, "task_loss": 0.06298065930604935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872219665474911, "compression_loss": 0.0, "distillation_loss": 0.07886943221092224, "epoch": 3.97, "learning_rate": 1.978032276803354e-05, "loss": 0.0749, "step": 4182, "task_loss": 0.03924179822206497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872309990872974, "compression_loss": 0.0, "distillation_loss": 0.024403858929872513, "epoch": 3.97, "learning_rate": 1.9769902826387106e-05, "loss": 0.0229, "step": 4183, "task_loss": 0.008915219455957413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872400273694844, "compression_loss": 0.0, "distillation_loss": 0.08649304509162903, "epoch": 3.97, "learning_rate": 1.9759483834813023e-05, "loss": 0.0885, "step": 4184, "task_loss": 0.10659407079219818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872490513950555, "compression_loss": 0.0, "distillation_loss": 0.030071692541241646, "epoch": 3.97, "learning_rate": 1.9749065795203938e-05, "loss": 0.0468, "step": 4185, "task_loss": 0.19697120785713196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872580711650146, "compression_loss": 0.0, "distillation_loss": 0.05038078874349594, "epoch": 3.98, "learning_rate": 1.9738648709452336e-05, "loss": 0.0527, "step": 4186, "task_loss": 0.07359597086906433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872670866803653, "compression_loss": 0.0, "distillation_loss": 0.09490829706192017, "epoch": 3.98, "learning_rate": 1.9728232579450543e-05, "loss": 0.1038, "step": 4187, "task_loss": 0.18415942788124084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872760979421113, "compression_loss": 0.0, "distillation_loss": 0.1876422017812729, "epoch": 3.98, "learning_rate": 1.971781740709068e-05, "loss": 0.1938, "step": 4188, "task_loss": 0.24940979480743408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872851049512563, "compression_loss": 0.0, "distillation_loss": 0.021935122087597847, "epoch": 3.98, "learning_rate": 1.970740319426474e-05, "loss": 0.0206, "step": 4189, "task_loss": 0.008345887064933777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7872941077088038, "compression_loss": 0.0, "distillation_loss": 0.07562540471553802, "epoch": 3.98, "learning_rate": 1.9696989942864488e-05, "loss": 0.0898, "step": 4190, "task_loss": 0.21687836945056915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873031062157578, "compression_loss": 0.0, "distillation_loss": 0.06813269853591919, "epoch": 3.98, "learning_rate": 1.9686577654781546e-05, "loss": 0.0756, "step": 4191, "task_loss": 0.14326757192611694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873121004731217, "compression_loss": 0.0, "distillation_loss": 0.034589093178510666, "epoch": 3.98, "learning_rate": 1.967616633190737e-05, "loss": 0.0422, "step": 4192, "task_loss": 0.11064015328884125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873210904818994, "compression_loss": 0.0, "distillation_loss": 0.020227717235684395, "epoch": 3.98, "learning_rate": 1.966575597613322e-05, "loss": 0.0186, "step": 4193, "task_loss": 0.004256442189216614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873300762430945, "compression_loss": 0.0, "distillation_loss": 0.10587961226701736, "epoch": 3.98, "learning_rate": 1.9655346589350194e-05, "loss": 0.1102, "step": 4194, "task_loss": 0.14888451993465424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873390577577105, "compression_loss": 0.0, "distillation_loss": 0.08755764365196228, "epoch": 3.98, "learning_rate": 1.964493817344919e-05, "loss": 0.1018, "step": 4195, "task_loss": 0.22955255210399628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873480350267514, "compression_loss": 0.0, "distillation_loss": 0.16795973479747772, "epoch": 3.98, "learning_rate": 1.9634530730320967e-05, "loss": 0.1533, "step": 4196, "task_loss": 0.02090776339173317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873570080512207, "compression_loss": 0.0, "distillation_loss": 0.1484326869249344, "epoch": 3.99, "learning_rate": 1.9624124261856068e-05, "loss": 0.1569, "step": 4197, "task_loss": 0.23264384269714355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873659768321221, "compression_loss": 0.0, "distillation_loss": 0.07327691465616226, "epoch": 3.99, "learning_rate": 1.961371876994489e-05, "loss": 0.0931, "step": 4198, "task_loss": 0.2711363434791565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873749413704593, "compression_loss": 0.0, "distillation_loss": 0.04226084426045418, "epoch": 3.99, "learning_rate": 1.9603314256477644e-05, "loss": 0.0453, "step": 4199, "task_loss": 0.0727810189127922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873839016672359, "compression_loss": 0.0, "distillation_loss": 0.1644660234451294, "epoch": 3.99, "learning_rate": 1.9592910723344335e-05, "loss": 0.1607, "step": 4200, "task_loss": 0.12694597244262695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7873928577234557, "compression_loss": 0.0, "distillation_loss": 0.145688995718956, "epoch": 3.99, "learning_rate": 1.958250817243485e-05, "loss": 0.1528, "step": 4201, "task_loss": 0.2163536101579666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874018095401224, "compression_loss": 0.0, "distillation_loss": 0.04844709113240242, "epoch": 3.99, "learning_rate": 1.9572106605638842e-05, "loss": 0.0536, "step": 4202, "task_loss": 0.09967800229787827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874107571182396, "compression_loss": 0.0, "distillation_loss": 0.019720932468771935, "epoch": 3.99, "learning_rate": 1.956170602484582e-05, "loss": 0.0182, "step": 4203, "task_loss": 0.004351753741502762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874197004588109, "compression_loss": 0.0, "distillation_loss": 0.10592324286699295, "epoch": 3.99, "learning_rate": 1.955130643194508e-05, "loss": 0.1063, "step": 4204, "task_loss": 0.10958631336688995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874286395628401, "compression_loss": 0.0, "distillation_loss": 0.05713961645960808, "epoch": 3.99, "learning_rate": 1.9540907828825768e-05, "loss": 0.0603, "step": 4205, "task_loss": 0.08864769339561462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.787437574431331, "compression_loss": 0.0, "distillation_loss": 0.0976656973361969, "epoch": 3.99, "learning_rate": 1.9530510217376843e-05, "loss": 0.1037, "step": 4206, "task_loss": 0.15841828286647797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874465050652871, "compression_loss": 0.0, "distillation_loss": 0.13762739300727844, "epoch": 4.0, "learning_rate": 1.952011359948708e-05, "loss": 0.1429, "step": 4207, "task_loss": 0.1902618259191513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874554314657122, "compression_loss": 0.0, "distillation_loss": 0.047601085156202316, "epoch": 4.0, "learning_rate": 1.9509717977045068e-05, "loss": 0.0439, "step": 4208, "task_loss": 0.01073162630200386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874643536336098, "compression_loss": 0.0, "distillation_loss": 0.056678276509046555, "epoch": 4.0, "learning_rate": 1.949932335193922e-05, "loss": 0.0562, "step": 4209, "task_loss": 0.052305128425359726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874732715699837, "compression_loss": 0.0, "distillation_loss": 0.18532370030879974, "epoch": 4.0, "learning_rate": 1.948892972605779e-05, "loss": 0.1904, "step": 4210, "task_loss": 0.2363312840461731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874821852758376, "compression_loss": 0.0, "distillation_loss": 0.1466386467218399, "epoch": 4.0, "learning_rate": 1.9478537101288814e-05, "loss": 0.1392, "step": 4211, "task_loss": 0.07213930040597916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6058685241936025, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7755859339922954, "compression/magnitude_sparsity/target_sparsity_level": 0.7874910947521752, "compression_loss": 0.0, "distillation_loss": 0.078835628926754, "epoch": 4.0, "learning_rate": 1.946814547952016e-05, "loss": 0.0736, "step": 4212, "task_loss": 0.02693953923881054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875000000000001, "compression_loss": 0.0, "distillation_loss": 0.4135439693927765, "epoch": 4.0, "learning_rate": 1.945775486263953e-05, "loss": 0.3929, "step": 4213, "task_loss": 0.20665842294692993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787508901020316, "compression_loss": 0.0, "distillation_loss": 0.32134756445884705, "epoch": 4.0, "learning_rate": 1.9447365252534414e-05, "loss": 0.2996, "step": 4214, "task_loss": 0.10436099767684937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875177978141266, "compression_loss": 0.0, "distillation_loss": 0.3447112441062927, "epoch": 4.0, "learning_rate": 1.9436976651092144e-05, "loss": 0.3183, "step": 4215, "task_loss": 0.08028891682624817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875266903824356, "compression_loss": 0.0, "distillation_loss": 0.2967658042907715, "epoch": 4.0, "learning_rate": 1.942658906019986e-05, "loss": 0.2787, "step": 4216, "task_loss": 0.11631828546524048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875355787262467, "compression_loss": 0.0, "distillation_loss": 0.3276255130767822, "epoch": 4.0, "learning_rate": 1.9416202481744504e-05, "loss": 0.3027, "step": 4217, "task_loss": 0.07845309376716614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875444628465635, "compression_loss": 0.0, "distillation_loss": 0.3934395909309387, "epoch": 4.01, "learning_rate": 1.940581691761287e-05, "loss": 0.3804, "step": 4218, "task_loss": 0.2632703185081482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875533427443898, "compression_loss": 0.0, "distillation_loss": 0.3027340769767761, "epoch": 4.01, "learning_rate": 1.9395432369691526e-05, "loss": 0.2816, "step": 4219, "task_loss": 0.09113991260528564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875622184207293, "compression_loss": 0.0, "distillation_loss": 0.30732953548431396, "epoch": 4.01, "learning_rate": 1.9385048839866896e-05, "loss": 0.2857, "step": 4220, "task_loss": 0.09076324105262756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875710898765854, "compression_loss": 0.0, "distillation_loss": 0.27244919538497925, "epoch": 4.01, "learning_rate": 1.9374666330025178e-05, "loss": 0.26, "step": 4221, "task_loss": 0.14839491248130798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875799571129621, "compression_loss": 0.0, "distillation_loss": 0.13425913453102112, "epoch": 4.01, "learning_rate": 1.9364284842052414e-05, "loss": 0.1228, "step": 4222, "task_loss": 0.019507169723510742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787588820130863, "compression_loss": 0.0, "distillation_loss": 0.17389854788780212, "epoch": 4.01, "learning_rate": 1.9353904377834454e-05, "loss": 0.1588, "step": 4223, "task_loss": 0.0224696546792984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7875976789312916, "compression_loss": 0.0, "distillation_loss": 0.16582781076431274, "epoch": 4.01, "learning_rate": 1.934352493925695e-05, "loss": 0.1547, "step": 4224, "task_loss": 0.05480484664440155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876065335152518, "compression_loss": 0.0, "distillation_loss": 0.09588154405355453, "epoch": 4.01, "learning_rate": 1.933314652820539e-05, "loss": 0.0882, "step": 4225, "task_loss": 0.019207999110221863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876153838837472, "compression_loss": 0.0, "distillation_loss": 0.11465960741043091, "epoch": 4.01, "learning_rate": 1.932276914656504e-05, "loss": 0.106, "step": 4226, "task_loss": 0.028103739023208618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876242300377816, "compression_loss": 0.0, "distillation_loss": 0.22152969241142273, "epoch": 4.01, "learning_rate": 1.9312392796221033e-05, "loss": 0.217, "step": 4227, "task_loss": 0.1767098605632782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876330719783584, "compression_loss": 0.0, "distillation_loss": 0.17856980860233307, "epoch": 4.02, "learning_rate": 1.9302017479058256e-05, "loss": 0.1757, "step": 4228, "task_loss": 0.149837926030159 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876419097064816, "compression_loss": 0.0, "distillation_loss": 0.08909793198108673, "epoch": 4.02, "learning_rate": 1.929164319696145e-05, "loss": 0.09, "step": 4229, "task_loss": 0.09795405715703964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876507432231546, "compression_loss": 0.0, "distillation_loss": 0.11065828800201416, "epoch": 4.02, "learning_rate": 1.9281269951815154e-05, "loss": 0.1068, "step": 4230, "task_loss": 0.07157891988754272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876595725293813, "compression_loss": 0.0, "distillation_loss": 0.1124470978975296, "epoch": 4.02, "learning_rate": 1.9270897745503706e-05, "loss": 0.107, "step": 4231, "task_loss": 0.05780310928821564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876683976261654, "compression_loss": 0.0, "distillation_loss": 0.09072036296129227, "epoch": 4.02, "learning_rate": 1.9260526579911283e-05, "loss": 0.0887, "step": 4232, "task_loss": 0.07040438801050186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7876772185145103, "compression_loss": 0.0, "distillation_loss": 0.1262628436088562, "epoch": 4.02, "learning_rate": 1.9250156456921837e-05, "loss": 0.1201, "step": 4233, "task_loss": 0.06450807303190231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.78768603519542, "compression_loss": 0.0, "distillation_loss": 0.062325261533260345, "epoch": 4.02, "learning_rate": 1.9239787378419165e-05, "loss": 0.0646, "step": 4234, "task_loss": 0.08547300100326538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787694847669898, "compression_loss": 0.0, "distillation_loss": 0.054038092494010925, "epoch": 4.02, "learning_rate": 1.9229419346286853e-05, "loss": 0.0545, "step": 4235, "task_loss": 0.05840130150318146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787703655938948, "compression_loss": 0.0, "distillation_loss": 0.11831286549568176, "epoch": 4.02, "learning_rate": 1.9219052362408314e-05, "loss": 0.1204, "step": 4236, "task_loss": 0.13891497254371643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877124600035738, "compression_loss": 0.0, "distillation_loss": 0.13940417766571045, "epoch": 4.02, "learning_rate": 1.920868642866676e-05, "loss": 0.1331, "step": 4237, "task_loss": 0.07673609256744385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877212598647789, "compression_loss": 0.0, "distillation_loss": 0.11010223627090454, "epoch": 4.02, "learning_rate": 1.91983215469452e-05, "loss": 0.1152, "step": 4238, "task_loss": 0.16106702387332916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877300555235671, "compression_loss": 0.0, "distillation_loss": 0.18333761394023895, "epoch": 4.03, "learning_rate": 1.918795771912648e-05, "loss": 0.1795, "step": 4239, "task_loss": 0.14447200298309326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877388469809421, "compression_loss": 0.0, "distillation_loss": 0.08234288543462753, "epoch": 4.03, "learning_rate": 1.917759494709322e-05, "loss": 0.0861, "step": 4240, "task_loss": 0.12013165652751923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877476342379075, "compression_loss": 0.0, "distillation_loss": 0.134935200214386, "epoch": 4.03, "learning_rate": 1.9167233232727885e-05, "loss": 0.1334, "step": 4241, "task_loss": 0.11945871263742447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787756417295467, "compression_loss": 0.0, "distillation_loss": 0.053517088294029236, "epoch": 4.03, "learning_rate": 1.915687257791273e-05, "loss": 0.0582, "step": 4242, "task_loss": 0.10031777620315552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877651961546244, "compression_loss": 0.0, "distillation_loss": 0.1066487580537796, "epoch": 4.03, "learning_rate": 1.9146512984529793e-05, "loss": 0.1059, "step": 4243, "task_loss": 0.09886283427476883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877739708163831, "compression_loss": 0.0, "distillation_loss": 0.23031002283096313, "epoch": 4.03, "learning_rate": 1.913615445446098e-05, "loss": 0.2218, "step": 4244, "task_loss": 0.14517052471637726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7877827412817472, "compression_loss": 0.0, "distillation_loss": 0.2954586148262024, "epoch": 4.03, "learning_rate": 1.9125796989587947e-05, "loss": 0.3012, "step": 4245, "task_loss": 0.35336050391197205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.78779150755172, "compression_loss": 0.0, "distillation_loss": 0.0650944635272026, "epoch": 4.03, "learning_rate": 1.9115440591792182e-05, "loss": 0.063, "step": 4246, "task_loss": 0.044294241815805435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878002696273054, "compression_loss": 0.0, "distillation_loss": 0.070266954600811, "epoch": 4.03, "learning_rate": 1.9105085262954975e-05, "loss": 0.0739, "step": 4247, "task_loss": 0.10610578209161758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787809027509507, "compression_loss": 0.0, "distillation_loss": 0.09126845002174377, "epoch": 4.03, "learning_rate": 1.9094731004957416e-05, "loss": 0.0977, "step": 4248, "task_loss": 0.15584436058998108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878177811993284, "compression_loss": 0.0, "distillation_loss": 0.1306382119655609, "epoch": 4.04, "learning_rate": 1.9084377819680417e-05, "loss": 0.1219, "step": 4249, "task_loss": 0.043007947504520416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878265306977735, "compression_loss": 0.0, "distillation_loss": 0.06510676443576813, "epoch": 4.04, "learning_rate": 1.9074025709004672e-05, "loss": 0.0593, "step": 4250, "task_loss": 0.007219014689326286 }, { "epoch": 4.04, "eval_accuracy": 0.8910550458715596, "eval_loss": 0.53865647315979, "eval_runtime": 17.9464, "eval_samples_per_second": 48.589, "eval_steps_per_second": 6.074, "step": 4250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878352760058459, "compression_loss": 0.0, "distillation_loss": 0.07825392484664917, "epoch": 4.04, "learning_rate": 1.9063674674810696e-05, "loss": 0.0739, "step": 4251, "task_loss": 0.035188619047403336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878440171245491, "compression_loss": 0.0, "distillation_loss": 0.1569930762052536, "epoch": 4.04, "learning_rate": 1.90533247189788e-05, "loss": 0.1545, "step": 4252, "task_loss": 0.1325351744890213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878527540548871, "compression_loss": 0.0, "distillation_loss": 0.0776907354593277, "epoch": 4.04, "learning_rate": 1.9042975843389115e-05, "loss": 0.0752, "step": 4253, "task_loss": 0.05311368405818939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878614867978633, "compression_loss": 0.0, "distillation_loss": 0.02283748984336853, "epoch": 4.04, "learning_rate": 1.903262804992156e-05, "loss": 0.0229, "step": 4254, "task_loss": 0.023530958220362663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878702153544814, "compression_loss": 0.0, "distillation_loss": 0.35799717903137207, "epoch": 4.04, "learning_rate": 1.9022281340455854e-05, "loss": 0.3416, "step": 4255, "task_loss": 0.1938759684562683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878789397257453, "compression_loss": 0.0, "distillation_loss": 0.08360302448272705, "epoch": 4.04, "learning_rate": 1.9011935716871535e-05, "loss": 0.0995, "step": 4256, "task_loss": 0.24218979477882385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878876599126585, "compression_loss": 0.0, "distillation_loss": 0.060659270733594894, "epoch": 4.04, "learning_rate": 1.900159118104793e-05, "loss": 0.0644, "step": 4257, "task_loss": 0.09822718799114227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7878963759162247, "compression_loss": 0.0, "distillation_loss": 0.10400435328483582, "epoch": 4.04, "learning_rate": 1.8991247734864173e-05, "loss": 0.1054, "step": 4258, "task_loss": 0.11802786588668823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879050877374478, "compression_loss": 0.0, "distillation_loss": 0.12243454158306122, "epoch": 4.04, "learning_rate": 1.898090538019921e-05, "loss": 0.1253, "step": 4259, "task_loss": 0.15127399563789368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879137953773312, "compression_loss": 0.0, "distillation_loss": 0.04609953239560127, "epoch": 4.05, "learning_rate": 1.897056411893177e-05, "loss": 0.0505, "step": 4260, "task_loss": 0.08963650465011597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879224988368786, "compression_loss": 0.0, "distillation_loss": 0.02292802557349205, "epoch": 4.05, "learning_rate": 1.896022395294039e-05, "loss": 0.0211, "step": 4261, "task_loss": 0.004292329773306847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879311981170939, "compression_loss": 0.0, "distillation_loss": 0.037770919501781464, "epoch": 4.05, "learning_rate": 1.8949884884103418e-05, "loss": 0.0359, "step": 4262, "task_loss": 0.019271956756711006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879398932189805, "compression_loss": 0.0, "distillation_loss": 0.16756854951381683, "epoch": 4.05, "learning_rate": 1.8939546914299e-05, "loss": 0.171, "step": 4263, "task_loss": 0.20192977786064148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879485841435423, "compression_loss": 0.0, "distillation_loss": 0.07206468284130096, "epoch": 4.05, "learning_rate": 1.892921004540507e-05, "loss": 0.0677, "step": 4264, "task_loss": 0.028736798092722893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879572708917829, "compression_loss": 0.0, "distillation_loss": 0.0855240449309349, "epoch": 4.05, "learning_rate": 1.8918874279299372e-05, "loss": 0.0786, "step": 4265, "task_loss": 0.01638454757630825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.787965953464706, "compression_loss": 0.0, "distillation_loss": 0.06442791223526001, "epoch": 4.05, "learning_rate": 1.8908539617859456e-05, "loss": 0.0701, "step": 4266, "task_loss": 0.12096739560365677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879746318633153, "compression_loss": 0.0, "distillation_loss": 0.07881754636764526, "epoch": 4.05, "learning_rate": 1.8898206062962647e-05, "loss": 0.0732, "step": 4267, "task_loss": 0.022822581231594086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879833060886144, "compression_loss": 0.0, "distillation_loss": 0.09095026552677155, "epoch": 4.05, "learning_rate": 1.88878736164861e-05, "loss": 0.0839, "step": 4268, "task_loss": 0.020265545696020126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7879919761416071, "compression_loss": 0.0, "distillation_loss": 0.19944968819618225, "epoch": 4.05, "learning_rate": 1.8877542280306728e-05, "loss": 0.2095, "step": 4269, "task_loss": 0.2999950647354126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788000642023297, "compression_loss": 0.0, "distillation_loss": 0.08276577293872833, "epoch": 4.06, "learning_rate": 1.8867212056301305e-05, "loss": 0.0792, "step": 4270, "task_loss": 0.04671813175082207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880093037346878, "compression_loss": 0.0, "distillation_loss": 0.21997350454330444, "epoch": 4.06, "learning_rate": 1.8856882946346344e-05, "loss": 0.2087, "step": 4271, "task_loss": 0.10748877376317978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880179612767833, "compression_loss": 0.0, "distillation_loss": 0.14946487545967102, "epoch": 4.06, "learning_rate": 1.8846554952318178e-05, "loss": 0.1507, "step": 4272, "task_loss": 0.1618417650461197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880266146505869, "compression_loss": 0.0, "distillation_loss": 0.25104832649230957, "epoch": 4.06, "learning_rate": 1.8836228076092945e-05, "loss": 0.2645, "step": 4273, "task_loss": 0.3852643370628357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880352638571025, "compression_loss": 0.0, "distillation_loss": 0.01792140305042267, "epoch": 4.06, "learning_rate": 1.8825902319546565e-05, "loss": 0.0165, "step": 4274, "task_loss": 0.0033651478588581085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880439088973338, "compression_loss": 0.0, "distillation_loss": 0.051164254546165466, "epoch": 4.06, "learning_rate": 1.881557768455477e-05, "loss": 0.0474, "step": 4275, "task_loss": 0.013138506561517715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880525497722843, "compression_loss": 0.0, "distillation_loss": 0.1809595227241516, "epoch": 4.06, "learning_rate": 1.8805254172993064e-05, "loss": 0.1699, "step": 4276, "task_loss": 0.07031507790088654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880611864829579, "compression_loss": 0.0, "distillation_loss": 0.09073644876480103, "epoch": 4.06, "learning_rate": 1.879493178673677e-05, "loss": 0.0901, "step": 4277, "task_loss": 0.08396268635988235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880698190303581, "compression_loss": 0.0, "distillation_loss": 0.07403849810361862, "epoch": 4.06, "learning_rate": 1.8784610527661e-05, "loss": 0.0697, "step": 4278, "task_loss": 0.03097301721572876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880784474154886, "compression_loss": 0.0, "distillation_loss": 0.10361620783805847, "epoch": 4.06, "learning_rate": 1.8774290397640664e-05, "loss": 0.1067, "step": 4279, "task_loss": 0.13399073481559753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880870716393533, "compression_loss": 0.0, "distillation_loss": 0.04671156406402588, "epoch": 4.06, "learning_rate": 1.876397139855047e-05, "loss": 0.0516, "step": 4280, "task_loss": 0.09573487192392349 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7880956917029556, "compression_loss": 0.0, "distillation_loss": 0.06411126255989075, "epoch": 4.07, "learning_rate": 1.8753653532264894e-05, "loss": 0.0698, "step": 4281, "task_loss": 0.12079112231731415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881043076072994, "compression_loss": 0.0, "distillation_loss": 0.10022042691707611, "epoch": 4.07, "learning_rate": 1.8743336800658245e-05, "loss": 0.1016, "step": 4282, "task_loss": 0.11449723690748215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881129193533882, "compression_loss": 0.0, "distillation_loss": 0.13063356280326843, "epoch": 4.07, "learning_rate": 1.8733021205604596e-05, "loss": 0.129, "step": 4283, "task_loss": 0.11380869895219803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881215269422258, "compression_loss": 0.0, "distillation_loss": 0.171952024102211, "epoch": 4.07, "learning_rate": 1.872270674897782e-05, "loss": 0.1669, "step": 4284, "task_loss": 0.12174376845359802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881301303748159, "compression_loss": 0.0, "distillation_loss": 0.02115825191140175, "epoch": 4.07, "learning_rate": 1.8712393432651603e-05, "loss": 0.0195, "step": 4285, "task_loss": 0.0041760653257369995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881387296521621, "compression_loss": 0.0, "distillation_loss": 0.03141398727893829, "epoch": 4.07, "learning_rate": 1.87020812584994e-05, "loss": 0.0287, "step": 4286, "task_loss": 0.0046629346907138824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881473247752682, "compression_loss": 0.0, "distillation_loss": 0.10433746874332428, "epoch": 4.07, "learning_rate": 1.8691770228394456e-05, "loss": 0.1159, "step": 4287, "task_loss": 0.21988573670387268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881559157451377, "compression_loss": 0.0, "distillation_loss": 0.08133610337972641, "epoch": 4.07, "learning_rate": 1.868146034420984e-05, "loss": 0.0763, "step": 4288, "task_loss": 0.03072592243552208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881645025627744, "compression_loss": 0.0, "distillation_loss": 0.11363761126995087, "epoch": 4.07, "learning_rate": 1.8671151607818382e-05, "loss": 0.1087, "step": 4289, "task_loss": 0.06467755138874054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788173085229182, "compression_loss": 0.0, "distillation_loss": 0.051334887742996216, "epoch": 4.07, "learning_rate": 1.8660844021092716e-05, "loss": 0.0478, "step": 4290, "task_loss": 0.016054822131991386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788181663745364, "compression_loss": 0.0, "distillation_loss": 0.08099980652332306, "epoch": 4.08, "learning_rate": 1.8650537585905258e-05, "loss": 0.0847, "step": 4291, "task_loss": 0.11772287636995316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881902381123244, "compression_loss": 0.0, "distillation_loss": 0.015967125073075294, "epoch": 4.08, "learning_rate": 1.8640232304128236e-05, "loss": 0.0146, "step": 4292, "task_loss": 0.002349233254790306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7881988083310666, "compression_loss": 0.0, "distillation_loss": 0.03628503531217575, "epoch": 4.08, "learning_rate": 1.8629928177633637e-05, "loss": 0.0488, "step": 4293, "task_loss": 0.16185718774795532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882073744025945, "compression_loss": 0.0, "distillation_loss": 0.05933556705713272, "epoch": 4.08, "learning_rate": 1.8619625208293268e-05, "loss": 0.0568, "step": 4294, "task_loss": 0.03381485491991043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882159363279116, "compression_loss": 0.0, "distillation_loss": 0.16856390237808228, "epoch": 4.08, "learning_rate": 1.86093233979787e-05, "loss": 0.1643, "step": 4295, "task_loss": 0.12632951140403748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882244941080216, "compression_loss": 0.0, "distillation_loss": 0.07362917810678482, "epoch": 4.08, "learning_rate": 1.8599022748561325e-05, "loss": 0.075, "step": 4296, "task_loss": 0.08769040554761887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882330477439283, "compression_loss": 0.0, "distillation_loss": 0.03499433398246765, "epoch": 4.08, "learning_rate": 1.8588723261912288e-05, "loss": 0.0327, "step": 4297, "task_loss": 0.012281343340873718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882415972366354, "compression_loss": 0.0, "distillation_loss": 0.11919771134853363, "epoch": 4.08, "learning_rate": 1.857842493990255e-05, "loss": 0.1097, "step": 4298, "task_loss": 0.023989982903003693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882501425871464, "compression_loss": 0.0, "distillation_loss": 0.11720117926597595, "epoch": 4.08, "learning_rate": 1.856812778440285e-05, "loss": 0.1113, "step": 4299, "task_loss": 0.057778194546699524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882586837964651, "compression_loss": 0.0, "distillation_loss": 0.24679766595363617, "epoch": 4.08, "learning_rate": 1.8557831797283716e-05, "loss": 0.2454, "step": 4300, "task_loss": 0.23254123330116272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882672208655952, "compression_loss": 0.0, "distillation_loss": 0.05673684924840927, "epoch": 4.08, "learning_rate": 1.8547536980415452e-05, "loss": 0.0526, "step": 4301, "task_loss": 0.014921434223651886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882757537955404, "compression_loss": 0.0, "distillation_loss": 0.06865353882312775, "epoch": 4.09, "learning_rate": 1.8537243335668187e-05, "loss": 0.0635, "step": 4302, "task_loss": 0.01695919781923294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882842825873042, "compression_loss": 0.0, "distillation_loss": 0.08616451919078827, "epoch": 4.09, "learning_rate": 1.8526950864911784e-05, "loss": 0.0925, "step": 4303, "task_loss": 0.14940661191940308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7882928072418904, "compression_loss": 0.0, "distillation_loss": 0.0653180330991745, "epoch": 4.09, "learning_rate": 1.8516659570015924e-05, "loss": 0.0677, "step": 4304, "task_loss": 0.08950569480657578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883013277603028, "compression_loss": 0.0, "distillation_loss": 0.01897302269935608, "epoch": 4.09, "learning_rate": 1.8506369452850087e-05, "loss": 0.0177, "step": 4305, "task_loss": 0.0065006837248802185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788309844143545, "compression_loss": 0.0, "distillation_loss": 0.0725211426615715, "epoch": 4.09, "learning_rate": 1.8496080515283514e-05, "loss": 0.0802, "step": 4306, "task_loss": 0.14979489147663116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883183563926206, "compression_loss": 0.0, "distillation_loss": 0.03266327083110809, "epoch": 4.09, "learning_rate": 1.8485792759185232e-05, "loss": 0.0298, "step": 4307, "task_loss": 0.0037787500768899918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883268645085333, "compression_loss": 0.0, "distillation_loss": 0.02446456253528595, "epoch": 4.09, "learning_rate": 1.8475506186424074e-05, "loss": 0.0224, "step": 4308, "task_loss": 0.004015939310193062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883353684922869, "compression_loss": 0.0, "distillation_loss": 0.029062699526548386, "epoch": 4.09, "learning_rate": 1.846522079886864e-05, "loss": 0.0269, "step": 4309, "task_loss": 0.007744431495666504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788343868344885, "compression_loss": 0.0, "distillation_loss": 0.026423152536153793, "epoch": 4.09, "learning_rate": 1.8454936598387317e-05, "loss": 0.0375, "step": 4310, "task_loss": 0.13689905405044556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883523640673312, "compression_loss": 0.0, "distillation_loss": 0.18082647025585175, "epoch": 4.09, "learning_rate": 1.8444653586848286e-05, "loss": 0.1714, "step": 4311, "task_loss": 0.08704258501529694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883608556606293, "compression_loss": 0.0, "distillation_loss": 0.06727072596549988, "epoch": 4.09, "learning_rate": 1.8434371766119496e-05, "loss": 0.0641, "step": 4312, "task_loss": 0.03516186401247978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788369343125783, "compression_loss": 0.0, "distillation_loss": 0.02708693966269493, "epoch": 4.1, "learning_rate": 1.8424091138068692e-05, "loss": 0.0254, "step": 4313, "task_loss": 0.009996037930250168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788377826463796, "compression_loss": 0.0, "distillation_loss": 0.036099981516599655, "epoch": 4.1, "learning_rate": 1.8413811704563405e-05, "loss": 0.0384, "step": 4314, "task_loss": 0.059546999633312225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883863056756718, "compression_loss": 0.0, "distillation_loss": 0.048894353210926056, "epoch": 4.1, "learning_rate": 1.8403533467470946e-05, "loss": 0.0634, "step": 4315, "task_loss": 0.1943952441215515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7883947807624142, "compression_loss": 0.0, "distillation_loss": 0.1497052013874054, "epoch": 4.1, "learning_rate": 1.8393256428658403e-05, "loss": 0.1447, "step": 4316, "task_loss": 0.0995352566242218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884032517250269, "compression_loss": 0.0, "distillation_loss": 0.07365826517343521, "epoch": 4.1, "learning_rate": 1.8382980589992643e-05, "loss": 0.0695, "step": 4317, "task_loss": 0.031793296337127686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884117185645135, "compression_loss": 0.0, "distillation_loss": 0.058896757662296295, "epoch": 4.1, "learning_rate": 1.8372705953340337e-05, "loss": 0.0537, "step": 4318, "task_loss": 0.006742917001247406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884201812818779, "compression_loss": 0.0, "distillation_loss": 0.057655736804008484, "epoch": 4.1, "learning_rate": 1.8362432520567903e-05, "loss": 0.0562, "step": 4319, "task_loss": 0.04272696375846863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884286398781235, "compression_loss": 0.0, "distillation_loss": 0.06040318310260773, "epoch": 4.1, "learning_rate": 1.8352160293541566e-05, "loss": 0.0561, "step": 4320, "task_loss": 0.017850998789072037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884370943542541, "compression_loss": 0.0, "distillation_loss": 0.028085466474294662, "epoch": 4.1, "learning_rate": 1.834188927412732e-05, "loss": 0.0255, "step": 4321, "task_loss": 0.002726750448346138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884455447112735, "compression_loss": 0.0, "distillation_loss": 0.014654599130153656, "epoch": 4.1, "learning_rate": 1.833161946419097e-05, "loss": 0.0203, "step": 4322, "task_loss": 0.07066480070352554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884539909501852, "compression_loss": 0.0, "distillation_loss": 0.14747686684131622, "epoch": 4.11, "learning_rate": 1.8321350865598057e-05, "loss": 0.1489, "step": 4323, "task_loss": 0.16213619709014893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884624330719929, "compression_loss": 0.0, "distillation_loss": 0.030887076631188393, "epoch": 4.11, "learning_rate": 1.831108348021392e-05, "loss": 0.0283, "step": 4324, "task_loss": 0.005010116845369339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884708710777004, "compression_loss": 0.0, "distillation_loss": 0.056699033826589584, "epoch": 4.11, "learning_rate": 1.8300817309903686e-05, "loss": 0.0685, "step": 4325, "task_loss": 0.17428170144557953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884793049683114, "compression_loss": 0.0, "distillation_loss": 0.03099614754319191, "epoch": 4.11, "learning_rate": 1.8290552356532247e-05, "loss": 0.0287, "step": 4326, "task_loss": 0.008489016443490982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884877347448294, "compression_loss": 0.0, "distillation_loss": 0.04078204557299614, "epoch": 4.11, "learning_rate": 1.8280288621964288e-05, "loss": 0.0456, "step": 4327, "task_loss": 0.08923979848623276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7884961604082582, "compression_loss": 0.0, "distillation_loss": 0.0556565560400486, "epoch": 4.11, "learning_rate": 1.827002610806427e-05, "loss": 0.0541, "step": 4328, "task_loss": 0.03984564542770386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885045819596015, "compression_loss": 0.0, "distillation_loss": 0.06724396347999573, "epoch": 4.11, "learning_rate": 1.825976481669641e-05, "loss": 0.0626, "step": 4329, "task_loss": 0.02046894282102585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885129993998629, "compression_loss": 0.0, "distillation_loss": 0.034551121294498444, "epoch": 4.11, "learning_rate": 1.824950474972473e-05, "loss": 0.0469, "step": 4330, "task_loss": 0.15822812914848328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885214127300461, "compression_loss": 0.0, "distillation_loss": 0.03939036652445793, "epoch": 4.11, "learning_rate": 1.823924590901303e-05, "loss": 0.0436, "step": 4331, "task_loss": 0.08171267807483673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885298219511548, "compression_loss": 0.0, "distillation_loss": 0.046313732862472534, "epoch": 4.11, "learning_rate": 1.8228988296424877e-05, "loss": 0.0426, "step": 4332, "task_loss": 0.009343873709440231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885382270641927, "compression_loss": 0.0, "distillation_loss": 0.16361156105995178, "epoch": 4.11, "learning_rate": 1.82187319138236e-05, "loss": 0.1613, "step": 4333, "task_loss": 0.14066770672798157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885466280701636, "compression_loss": 0.0, "distillation_loss": 0.12138652801513672, "epoch": 4.12, "learning_rate": 1.8208476763072332e-05, "loss": 0.1152, "step": 4334, "task_loss": 0.05983586981892586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788555024970071, "compression_loss": 0.0, "distillation_loss": 0.05892729386687279, "epoch": 4.12, "learning_rate": 1.8198222846033975e-05, "loss": 0.0612, "step": 4335, "task_loss": 0.0814908966422081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885634177649186, "compression_loss": 0.0, "distillation_loss": 0.01657470129430294, "epoch": 4.12, "learning_rate": 1.8187970164571187e-05, "loss": 0.0158, "step": 4336, "task_loss": 0.008594617247581482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885718064557101, "compression_loss": 0.0, "distillation_loss": 0.16905143857002258, "epoch": 4.12, "learning_rate": 1.817771872054643e-05, "loss": 0.16, "step": 4337, "task_loss": 0.07822901755571365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885801910434493, "compression_loss": 0.0, "distillation_loss": 0.12039444595575333, "epoch": 4.12, "learning_rate": 1.8167468515821924e-05, "loss": 0.1294, "step": 4338, "task_loss": 0.210740864276886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885885715291397, "compression_loss": 0.0, "distillation_loss": 0.320880651473999, "epoch": 4.12, "learning_rate": 1.815721955225966e-05, "loss": 0.3213, "step": 4339, "task_loss": 0.32472920417785645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7885969479137851, "compression_loss": 0.0, "distillation_loss": 0.11701447516679764, "epoch": 4.12, "learning_rate": 1.8146971831721426e-05, "loss": 0.1076, "step": 4340, "task_loss": 0.023150721564888954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886053201983891, "compression_loss": 0.0, "distillation_loss": 0.05050637945532799, "epoch": 4.12, "learning_rate": 1.8136725356068762e-05, "loss": 0.0468, "step": 4341, "task_loss": 0.013836957514286041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886136883839555, "compression_loss": 0.0, "distillation_loss": 0.14656773209571838, "epoch": 4.12, "learning_rate": 1.8126480127163e-05, "loss": 0.1385, "step": 4342, "task_loss": 0.06583775579929352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788622052471488, "compression_loss": 0.0, "distillation_loss": 0.17784056067466736, "epoch": 4.12, "learning_rate": 1.8116236146865213e-05, "loss": 0.1849, "step": 4343, "task_loss": 0.24860072135925293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886304124619901, "compression_loss": 0.0, "distillation_loss": 0.03818412870168686, "epoch": 4.13, "learning_rate": 1.810599341703629e-05, "loss": 0.0394, "step": 4344, "task_loss": 0.0502682588994503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886387683564655, "compression_loss": 0.0, "distillation_loss": 0.1047634482383728, "epoch": 4.13, "learning_rate": 1.8095751939536866e-05, "loss": 0.0986, "step": 4345, "task_loss": 0.04341891035437584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788647120155918, "compression_loss": 0.0, "distillation_loss": 0.26263827085494995, "epoch": 4.13, "learning_rate": 1.8085511716227345e-05, "loss": 0.255, "step": 4346, "task_loss": 0.18654467165470123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886554678613513, "compression_loss": 0.0, "distillation_loss": 0.09224887192249298, "epoch": 4.13, "learning_rate": 1.807527274896792e-05, "loss": 0.0892, "step": 4347, "task_loss": 0.06142377480864525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788663811473769, "compression_loss": 0.0, "distillation_loss": 0.12528349459171295, "epoch": 4.13, "learning_rate": 1.8065035039618556e-05, "loss": 0.1222, "step": 4348, "task_loss": 0.09476065635681152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886721509941748, "compression_loss": 0.0, "distillation_loss": 0.04307865723967552, "epoch": 4.13, "learning_rate": 1.8054798590038984e-05, "loss": 0.0394, "step": 4349, "task_loss": 0.006099509075284004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886804864235724, "compression_loss": 0.0, "distillation_loss": 0.046839505434036255, "epoch": 4.13, "learning_rate": 1.8044563402088684e-05, "loss": 0.0433, "step": 4350, "task_loss": 0.01109330728650093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886888177629654, "compression_loss": 0.0, "distillation_loss": 0.03697451949119568, "epoch": 4.13, "learning_rate": 1.8034329477626945e-05, "loss": 0.0493, "step": 4351, "task_loss": 0.16054922342300415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7886971450133576, "compression_loss": 0.0, "distillation_loss": 0.066026471555233, "epoch": 4.13, "learning_rate": 1.8024096818512807e-05, "loss": 0.0672, "step": 4352, "task_loss": 0.07799084484577179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887054681757527, "compression_loss": 0.0, "distillation_loss": 0.047900401055812836, "epoch": 4.13, "learning_rate": 1.8013865426605076e-05, "loss": 0.0578, "step": 4353, "task_loss": 0.14682576060295105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887137872511543, "compression_loss": 0.0, "distillation_loss": 0.04553601145744324, "epoch": 4.13, "learning_rate": 1.8003635303762336e-05, "loss": 0.0483, "step": 4354, "task_loss": 0.0731407031416893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788722102240566, "compression_loss": 0.0, "distillation_loss": 0.07538627833127975, "epoch": 4.14, "learning_rate": 1.7993406451842935e-05, "loss": 0.0774, "step": 4355, "task_loss": 0.0952514261007309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887304131449917, "compression_loss": 0.0, "distillation_loss": 0.2628774642944336, "epoch": 4.14, "learning_rate": 1.7983178872704992e-05, "loss": 0.2549, "step": 4356, "task_loss": 0.1826838105916977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887387199654349, "compression_loss": 0.0, "distillation_loss": 0.10139012336730957, "epoch": 4.14, "learning_rate": 1.7972952568206402e-05, "loss": 0.1041, "step": 4357, "task_loss": 0.12895506620407104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887470227028993, "compression_loss": 0.0, "distillation_loss": 0.19154085218906403, "epoch": 4.14, "learning_rate": 1.7962727540204827e-05, "loss": 0.1773, "step": 4358, "task_loss": 0.04891242831945419 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887553213583888, "compression_loss": 0.0, "distillation_loss": 0.09380476176738739, "epoch": 4.14, "learning_rate": 1.7952503790557686e-05, "loss": 0.097, "step": 4359, "task_loss": 0.1261102557182312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887636159329067, "compression_loss": 0.0, "distillation_loss": 0.10539199411869049, "epoch": 4.14, "learning_rate": 1.7942281321122168e-05, "loss": 0.1145, "step": 4360, "task_loss": 0.19654574990272522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788771906427457, "compression_loss": 0.0, "distillation_loss": 0.021082423627376556, "epoch": 4.14, "learning_rate": 1.7932060133755245e-05, "loss": 0.0256, "step": 4361, "task_loss": 0.06635300070047379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887801928430432, "compression_loss": 0.0, "distillation_loss": 0.020916704088449478, "epoch": 4.14, "learning_rate": 1.792184023031363e-05, "loss": 0.0241, "step": 4362, "task_loss": 0.05260460451245308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887884751806692, "compression_loss": 0.0, "distillation_loss": 0.13232557475566864, "epoch": 4.14, "learning_rate": 1.7911621612653832e-05, "loss": 0.1251, "step": 4363, "task_loss": 0.060476090759038925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7887967534413384, "compression_loss": 0.0, "distillation_loss": 0.1687774807214737, "epoch": 4.14, "learning_rate": 1.7901404282632105e-05, "loss": 0.156, "step": 4364, "task_loss": 0.04108697548508644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888050276260546, "compression_loss": 0.0, "distillation_loss": 0.052245475351810455, "epoch": 4.15, "learning_rate": 1.7891188242104466e-05, "loss": 0.0564, "step": 4365, "task_loss": 0.09406599402427673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888132977358217, "compression_loss": 0.0, "distillation_loss": 0.02484312653541565, "epoch": 4.15, "learning_rate": 1.7880973492926734e-05, "loss": 0.0229, "step": 4366, "task_loss": 0.005430508404970169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788821563771643, "compression_loss": 0.0, "distillation_loss": 0.06918057799339294, "epoch": 4.15, "learning_rate": 1.7870760036954444e-05, "loss": 0.0648, "step": 4367, "task_loss": 0.02556915022432804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888298257345224, "compression_loss": 0.0, "distillation_loss": 0.19346660375595093, "epoch": 4.15, "learning_rate": 1.786054787604294e-05, "loss": 0.1922, "step": 4368, "task_loss": 0.18102970719337463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888380836254636, "compression_loss": 0.0, "distillation_loss": 0.1463518738746643, "epoch": 4.15, "learning_rate": 1.7850337012047287e-05, "loss": 0.155, "step": 4369, "task_loss": 0.23283621668815613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888463374454703, "compression_loss": 0.0, "distillation_loss": 0.06679415702819824, "epoch": 4.15, "learning_rate": 1.784012744682235e-05, "loss": 0.0708, "step": 4370, "task_loss": 0.10708191245794296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888545871955459, "compression_loss": 0.0, "distillation_loss": 0.015153437852859497, "epoch": 4.15, "learning_rate": 1.7829919182222752e-05, "loss": 0.0192, "step": 4371, "task_loss": 0.0555480532348156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888628328766945, "compression_loss": 0.0, "distillation_loss": 0.04128699749708176, "epoch": 4.15, "learning_rate": 1.7819712220102857e-05, "loss": 0.0446, "step": 4372, "task_loss": 0.07435785233974457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888710744899194, "compression_loss": 0.0, "distillation_loss": 0.07817722856998444, "epoch": 4.15, "learning_rate": 1.7809506562316818e-05, "loss": 0.08, "step": 4373, "task_loss": 0.09686526656150818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888793120362246, "compression_loss": 0.0, "distillation_loss": 0.058174848556518555, "epoch": 4.15, "learning_rate": 1.7799302210718544e-05, "loss": 0.0739, "step": 4374, "task_loss": 0.21509107947349548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888875455166136, "compression_loss": 0.0, "distillation_loss": 0.02214631997048855, "epoch": 4.15, "learning_rate": 1.7789099167161704e-05, "loss": 0.0367, "step": 4375, "task_loss": 0.1679784506559372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7888957749320901, "compression_loss": 0.0, "distillation_loss": 0.03224210441112518, "epoch": 4.16, "learning_rate": 1.777889743349973e-05, "loss": 0.0327, "step": 4376, "task_loss": 0.036665864288806915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889040002836578, "compression_loss": 0.0, "distillation_loss": 0.04349253326654434, "epoch": 4.16, "learning_rate": 1.776869701158581e-05, "loss": 0.041, "step": 4377, "task_loss": 0.01853703148663044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889122215723204, "compression_loss": 0.0, "distillation_loss": 0.03350648283958435, "epoch": 4.16, "learning_rate": 1.775849790327291e-05, "loss": 0.0312, "step": 4378, "task_loss": 0.010053610429167747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889204387990816, "compression_loss": 0.0, "distillation_loss": 0.11184386909008026, "epoch": 4.16, "learning_rate": 1.7748300110413737e-05, "loss": 0.105, "step": 4379, "task_loss": 0.04385565221309662 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.788928651964945, "compression_loss": 0.0, "distillation_loss": 0.022888878360390663, "epoch": 4.16, "learning_rate": 1.7738103634860776e-05, "loss": 0.0224, "step": 4380, "task_loss": 0.017647787928581238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889368610709144, "compression_loss": 0.0, "distillation_loss": 0.11289818584918976, "epoch": 4.16, "learning_rate": 1.7727908478466264e-05, "loss": 0.1062, "step": 4381, "task_loss": 0.04638146981596947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889450661179934, "compression_loss": 0.0, "distillation_loss": 0.1492418795824051, "epoch": 4.16, "learning_rate": 1.771771464308219e-05, "loss": 0.1528, "step": 4382, "task_loss": 0.18500253558158875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889532671071856, "compression_loss": 0.0, "distillation_loss": 0.13683563470840454, "epoch": 4.16, "learning_rate": 1.770752213056033e-05, "loss": 0.137, "step": 4383, "task_loss": 0.13819673657417297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889614640394949, "compression_loss": 0.0, "distillation_loss": 0.02154296077787876, "epoch": 4.16, "learning_rate": 1.7697330942752193e-05, "loss": 0.0206, "step": 4384, "task_loss": 0.012236261740326881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889696569159248, "compression_loss": 0.0, "distillation_loss": 0.041471950709819794, "epoch": 4.16, "learning_rate": 1.768714108150907e-05, "loss": 0.044, "step": 4385, "task_loss": 0.06637803465127945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889778457374791, "compression_loss": 0.0, "distillation_loss": 0.1052827313542366, "epoch": 4.17, "learning_rate": 1.767695254868198e-05, "loss": 0.1006, "step": 4386, "task_loss": 0.057975780218839645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889860305051614, "compression_loss": 0.0, "distillation_loss": 0.0629846528172493, "epoch": 4.17, "learning_rate": 1.766676534612173e-05, "loss": 0.0609, "step": 4387, "task_loss": 0.042560774832963943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7889942112199754, "compression_loss": 0.0, "distillation_loss": 0.039940863847732544, "epoch": 4.17, "learning_rate": 1.7656579475678876e-05, "loss": 0.0388, "step": 4388, "task_loss": 0.028355613350868225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890023878829248, "compression_loss": 0.0, "distillation_loss": 0.03699345886707306, "epoch": 4.17, "learning_rate": 1.764639493920372e-05, "loss": 0.041, "step": 4389, "task_loss": 0.07748576998710632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890105604950132, "compression_loss": 0.0, "distillation_loss": 0.03237525001168251, "epoch": 4.17, "learning_rate": 1.763621173854635e-05, "loss": 0.0299, "step": 4390, "task_loss": 0.007551593706011772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890187290572445, "compression_loss": 0.0, "distillation_loss": 0.08855956047773361, "epoch": 4.17, "learning_rate": 1.762602987555656e-05, "loss": 0.0878, "step": 4391, "task_loss": 0.08059325814247131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890268935706221, "compression_loss": 0.0, "distillation_loss": 0.04030374437570572, "epoch": 4.17, "learning_rate": 1.7615849352083975e-05, "loss": 0.0561, "step": 4392, "task_loss": 0.1978634148836136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890350540361498, "compression_loss": 0.0, "distillation_loss": 0.021272670477628708, "epoch": 4.17, "learning_rate": 1.760567016997791e-05, "loss": 0.0197, "step": 4393, "task_loss": 0.005058445036411285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890432104548314, "compression_loss": 0.0, "distillation_loss": 0.13198691606521606, "epoch": 4.17, "learning_rate": 1.760567016997791e-05, "loss": 0.128, "step": 4394, "task_loss": 0.09192854911088943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890513628276704, "compression_loss": 0.0, "distillation_loss": 0.14407461881637573, "epoch": 4.17, "learning_rate": 1.7595492331087472e-05, "loss": 0.1359, "step": 4395, "task_loss": 0.06224524974822998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890595111556706, "compression_loss": 0.0, "distillation_loss": 0.028794020414352417, "epoch": 4.17, "learning_rate": 1.7585315837261518e-05, "loss": 0.0571, "step": 4396, "task_loss": 0.31160321831703186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890676554398357, "compression_loss": 0.0, "distillation_loss": 0.1819879114627838, "epoch": 4.18, "learning_rate": 1.7575140690348647e-05, "loss": 0.1749, "step": 4397, "task_loss": 0.11067160218954086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890757956811694, "compression_loss": 0.0, "distillation_loss": 0.0241214781999588, "epoch": 4.18, "learning_rate": 1.756496689219723e-05, "loss": 0.0231, "step": 4398, "task_loss": 0.013442834839224815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890839318806752, "compression_loss": 0.0, "distillation_loss": 0.0341639518737793, "epoch": 4.18, "learning_rate": 1.755479444465538e-05, "loss": 0.0382, "step": 4399, "task_loss": 0.07429169118404388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7890920640393568, "compression_loss": 0.0, "distillation_loss": 0.06670385599136353, "epoch": 4.18, "learning_rate": 1.7544623349570973e-05, "loss": 0.071, "step": 4400, "task_loss": 0.11016245931386948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891001921582181, "compression_loss": 0.0, "distillation_loss": 0.11859071999788284, "epoch": 4.18, "learning_rate": 1.7534453608791644e-05, "loss": 0.1156, "step": 4401, "task_loss": 0.08839291334152222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891083162382626, "compression_loss": 0.0, "distillation_loss": 0.027910087257623672, "epoch": 4.18, "learning_rate": 1.7524285224164772e-05, "loss": 0.0256, "step": 4402, "task_loss": 0.005291949957609177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891164362804941, "compression_loss": 0.0, "distillation_loss": 0.05852048099040985, "epoch": 4.18, "learning_rate": 1.7514118197537497e-05, "loss": 0.0757, "step": 4403, "task_loss": 0.2301456481218338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891245522859162, "compression_loss": 0.0, "distillation_loss": 0.0825999528169632, "epoch": 4.18, "learning_rate": 1.75039525307567e-05, "loss": 0.091, "step": 4404, "task_loss": 0.1666475236415863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891326642555326, "compression_loss": 0.0, "distillation_loss": 0.02013680338859558, "epoch": 4.18, "learning_rate": 1.7493788225669027e-05, "loss": 0.0268, "step": 4405, "task_loss": 0.08626913279294968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789140772190347, "compression_loss": 0.0, "distillation_loss": 0.024234000593423843, "epoch": 4.18, "learning_rate": 1.7483625284120876e-05, "loss": 0.031, "step": 4406, "task_loss": 0.09184201061725616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891488760913631, "compression_loss": 0.0, "distillation_loss": 0.11344337463378906, "epoch": 4.19, "learning_rate": 1.7473463707958388e-05, "loss": 0.1076, "step": 4407, "task_loss": 0.055413682013750076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891569759595845, "compression_loss": 0.0, "distillation_loss": 0.021596118807792664, "epoch": 4.19, "learning_rate": 1.7463303499027466e-05, "loss": 0.0199, "step": 4408, "task_loss": 0.004350002855062485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891650717960149, "compression_loss": 0.0, "distillation_loss": 0.03837398812174797, "epoch": 4.19, "learning_rate": 1.745314465917375e-05, "loss": 0.0356, "step": 4409, "task_loss": 0.010302269831299782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789173163601658, "compression_loss": 0.0, "distillation_loss": 0.23183999955654144, "epoch": 4.19, "learning_rate": 1.7442987190242668e-05, "loss": 0.2387, "step": 4410, "task_loss": 0.3002890646457672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891812513775176, "compression_loss": 0.0, "distillation_loss": 0.1880335658788681, "epoch": 4.19, "learning_rate": 1.7432831094079355e-05, "loss": 0.1926, "step": 4411, "task_loss": 0.2336317002773285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891893351245972, "compression_loss": 0.0, "distillation_loss": 0.13378804922103882, "epoch": 4.19, "learning_rate": 1.7422676372528718e-05, "loss": 0.13, "step": 4412, "task_loss": 0.09611691534519196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7891974148439006, "compression_loss": 0.0, "distillation_loss": 0.021651534363627434, "epoch": 4.19, "learning_rate": 1.7412523027435407e-05, "loss": 0.0242, "step": 4413, "task_loss": 0.04686147719621658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892054905364314, "compression_loss": 0.0, "distillation_loss": 0.10245324671268463, "epoch": 4.19, "learning_rate": 1.740237106064383e-05, "loss": 0.1, "step": 4414, "task_loss": 0.07831554114818573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892135622031934, "compression_loss": 0.0, "distillation_loss": 0.016463253647089005, "epoch": 4.19, "learning_rate": 1.7392220473998147e-05, "loss": 0.0154, "step": 4415, "task_loss": 0.005847932770848274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892216298451902, "compression_loss": 0.0, "distillation_loss": 0.050443872809410095, "epoch": 4.19, "learning_rate": 1.738207126934225e-05, "loss": 0.0581, "step": 4416, "task_loss": 0.12747488915920258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892296934634254, "compression_loss": 0.0, "distillation_loss": 0.11745072156190872, "epoch": 4.19, "learning_rate": 1.737192344851979e-05, "loss": 0.112, "step": 4417, "task_loss": 0.06290709227323532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892377530589028, "compression_loss": 0.0, "distillation_loss": 0.041940174996852875, "epoch": 4.2, "learning_rate": 1.7361777013374173e-05, "loss": 0.0392, "step": 4418, "task_loss": 0.01443542167544365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892458086326261, "compression_loss": 0.0, "distillation_loss": 0.18550318479537964, "epoch": 4.2, "learning_rate": 1.7351631965748555e-05, "loss": 0.1769, "step": 4419, "task_loss": 0.09899017959833145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892538601855988, "compression_loss": 0.0, "distillation_loss": 0.02498285286128521, "epoch": 4.2, "learning_rate": 1.734148830748582e-05, "loss": 0.023, "step": 4420, "task_loss": 0.0047489944845438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892619077188249, "compression_loss": 0.0, "distillation_loss": 0.02473044954240322, "epoch": 4.2, "learning_rate": 1.733134604042862e-05, "loss": 0.0284, "step": 4421, "task_loss": 0.06133115291595459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892699512333078, "compression_loss": 0.0, "distillation_loss": 0.07032322883605957, "epoch": 4.2, "learning_rate": 1.7321205166419348e-05, "loss": 0.0686, "step": 4422, "task_loss": 0.052621208131313324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892779907300513, "compression_loss": 0.0, "distillation_loss": 0.06657904386520386, "epoch": 4.2, "learning_rate": 1.7311065687300133e-05, "loss": 0.0622, "step": 4423, "task_loss": 0.022893797606229782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789286026210059, "compression_loss": 0.0, "distillation_loss": 0.06309516727924347, "epoch": 4.2, "learning_rate": 1.730092760491287e-05, "loss": 0.0599, "step": 4424, "task_loss": 0.031243909150362015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7892940576743348, "compression_loss": 0.0, "distillation_loss": 0.07031765580177307, "epoch": 4.2, "learning_rate": 1.729079092109919e-05, "loss": 0.0728, "step": 4425, "task_loss": 0.09468785673379898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893020851238821, "compression_loss": 0.0, "distillation_loss": 0.2251608967781067, "epoch": 4.2, "learning_rate": 1.7280655637700456e-05, "loss": 0.222, "step": 4426, "task_loss": 0.1932929903268814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893101085597048, "compression_loss": 0.0, "distillation_loss": 0.022628076374530792, "epoch": 4.2, "learning_rate": 1.7270521756557805e-05, "loss": 0.0233, "step": 4427, "task_loss": 0.02976515144109726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893181279828064, "compression_loss": 0.0, "distillation_loss": 0.040559072047472, "epoch": 4.21, "learning_rate": 1.7260389279512106e-05, "loss": 0.0407, "step": 4428, "task_loss": 0.041948944330215454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893261433941907, "compression_loss": 0.0, "distillation_loss": 0.02462448924779892, "epoch": 4.21, "learning_rate": 1.7250258208403974e-05, "loss": 0.0236, "step": 4429, "task_loss": 0.014600781723856926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893341547948615, "compression_loss": 0.0, "distillation_loss": 0.02943100593984127, "epoch": 4.21, "learning_rate": 1.7240128545073753e-05, "loss": 0.0367, "step": 4430, "task_loss": 0.10226895660161972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893421621858222, "compression_loss": 0.0, "distillation_loss": 0.23634879291057587, "epoch": 4.21, "learning_rate": 1.723000029136156e-05, "loss": 0.244, "step": 4431, "task_loss": 0.3131251037120819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893501655680766, "compression_loss": 0.0, "distillation_loss": 0.14374813437461853, "epoch": 4.21, "learning_rate": 1.7219873449107233e-05, "loss": 0.136, "step": 4432, "task_loss": 0.06602862477302551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893581649426284, "compression_loss": 0.0, "distillation_loss": 0.03242877870798111, "epoch": 4.21, "learning_rate": 1.7209748020150362e-05, "loss": 0.0366, "step": 4433, "task_loss": 0.07460720837116241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893661603104813, "compression_loss": 0.0, "distillation_loss": 0.028279315680265427, "epoch": 4.21, "learning_rate": 1.719962400633028e-05, "loss": 0.0334, "step": 4434, "task_loss": 0.07948870211839676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789374151672639, "compression_loss": 0.0, "distillation_loss": 0.021993128582835197, "epoch": 4.21, "learning_rate": 1.7189501409486062e-05, "loss": 0.0263, "step": 4435, "task_loss": 0.06503792107105255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893821390301051, "compression_loss": 0.0, "distillation_loss": 0.08905947208404541, "epoch": 4.21, "learning_rate": 1.717938023145654e-05, "loss": 0.103, "step": 4436, "task_loss": 0.22882232069969177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893901223838834, "compression_loss": 0.0, "distillation_loss": 0.02391093783080578, "epoch": 4.21, "learning_rate": 1.716926047408025e-05, "loss": 0.022, "step": 4437, "task_loss": 0.004962872713804245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7893981017349775, "compression_loss": 0.0, "distillation_loss": 0.1042497307062149, "epoch": 4.21, "learning_rate": 1.7159142139195514e-05, "loss": 0.1155, "step": 4438, "task_loss": 0.21651120483875275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894060770843911, "compression_loss": 0.0, "distillation_loss": 0.14275360107421875, "epoch": 4.22, "learning_rate": 1.7149025228640376e-05, "loss": 0.1396, "step": 4439, "task_loss": 0.11126483976840973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894140484331279, "compression_loss": 0.0, "distillation_loss": 0.0388154536485672, "epoch": 4.22, "learning_rate": 1.7138909744252608e-05, "loss": 0.0382, "step": 4440, "task_loss": 0.03281474858522415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894220157821915, "compression_loss": 0.0, "distillation_loss": 0.07178475707769394, "epoch": 4.22, "learning_rate": 1.712879568786975e-05, "loss": 0.0693, "step": 4441, "task_loss": 0.046773068606853485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894299791325856, "compression_loss": 0.0, "distillation_loss": 0.02249164879322052, "epoch": 4.22, "learning_rate": 1.711868306132906e-05, "loss": 0.0268, "step": 4442, "task_loss": 0.06524014472961426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789437938485314, "compression_loss": 0.0, "distillation_loss": 0.028957027941942215, "epoch": 4.22, "learning_rate": 1.7108571866467547e-05, "loss": 0.0456, "step": 4443, "task_loss": 0.19559374451637268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894458938413803, "compression_loss": 0.0, "distillation_loss": 0.13259674608707428, "epoch": 4.22, "learning_rate": 1.709846210512196e-05, "loss": 0.1353, "step": 4444, "task_loss": 0.15936444699764252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894538452017882, "compression_loss": 0.0, "distillation_loss": 0.18647846579551697, "epoch": 4.22, "learning_rate": 1.7088353779128784e-05, "loss": 0.1902, "step": 4445, "task_loss": 0.22320255637168884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894617925675413, "compression_loss": 0.0, "distillation_loss": 0.12089197337627411, "epoch": 4.22, "learning_rate": 1.7078246890324257e-05, "loss": 0.1152, "step": 4446, "task_loss": 0.06432140618562698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894697359396434, "compression_loss": 0.0, "distillation_loss": 0.016367292031645775, "epoch": 4.22, "learning_rate": 1.706814144054433e-05, "loss": 0.0152, "step": 4447, "task_loss": 0.004441702738404274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894776753190981, "compression_loss": 0.0, "distillation_loss": 0.02612762153148651, "epoch": 4.22, "learning_rate": 1.705803743162471e-05, "loss": 0.0238, "step": 4448, "task_loss": 0.0026707202196121216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894856107069091, "compression_loss": 0.0, "distillation_loss": 0.046527355909347534, "epoch": 4.23, "learning_rate": 1.704793486540084e-05, "loss": 0.0495, "step": 4449, "task_loss": 0.07587733864784241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7894935421040802, "compression_loss": 0.0, "distillation_loss": 0.016864405944943428, "epoch": 4.23, "learning_rate": 1.7037833743707892e-05, "loss": 0.0237, "step": 4450, "task_loss": 0.08481664955615997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895014695116149, "compression_loss": 0.0, "distillation_loss": 0.026964813470840454, "epoch": 4.23, "learning_rate": 1.7027734068380803e-05, "loss": 0.0318, "step": 4451, "task_loss": 0.07501386851072311 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789509392930517, "compression_loss": 0.0, "distillation_loss": 0.0899241715669632, "epoch": 4.23, "learning_rate": 1.7017635841254194e-05, "loss": 0.0956, "step": 4452, "task_loss": 0.14717119932174683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895173123617901, "compression_loss": 0.0, "distillation_loss": 0.07356803119182587, "epoch": 4.23, "learning_rate": 1.7007539064162498e-05, "loss": 0.0707, "step": 4453, "task_loss": 0.04454321041703224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789525227806438, "compression_loss": 0.0, "distillation_loss": 0.09831836819648743, "epoch": 4.23, "learning_rate": 1.6997443738939815e-05, "loss": 0.0933, "step": 4454, "task_loss": 0.04812869057059288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895331392654642, "compression_loss": 0.0, "distillation_loss": 0.036545686423778534, "epoch": 4.23, "learning_rate": 1.6987349867420024e-05, "loss": 0.0339, "step": 4455, "task_loss": 0.010073903948068619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895410467398726, "compression_loss": 0.0, "distillation_loss": 0.15401822328567505, "epoch": 4.23, "learning_rate": 1.6977257451436712e-05, "loss": 0.1472, "step": 4456, "task_loss": 0.08590954542160034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895489502306667, "compression_loss": 0.0, "distillation_loss": 0.14784236252307892, "epoch": 4.23, "learning_rate": 1.6967166492823226e-05, "loss": 0.1489, "step": 4457, "task_loss": 0.15834221243858337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895568497388503, "compression_loss": 0.0, "distillation_loss": 0.09763270616531372, "epoch": 4.23, "learning_rate": 1.6957076993412636e-05, "loss": 0.0944, "step": 4458, "task_loss": 0.06483699381351471 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789564745265427, "compression_loss": 0.0, "distillation_loss": 0.03840739279985428, "epoch": 4.23, "learning_rate": 1.694698895503774e-05, "loss": 0.0356, "step": 4459, "task_loss": 0.010582242161035538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895726368114006, "compression_loss": 0.0, "distillation_loss": 0.04345633089542389, "epoch": 4.24, "learning_rate": 1.6936902379531082e-05, "loss": 0.0488, "step": 4460, "task_loss": 0.09703336656093597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895805243777746, "compression_loss": 0.0, "distillation_loss": 0.0363914854824543, "epoch": 4.24, "learning_rate": 1.6926817268724938e-05, "loss": 0.0399, "step": 4461, "task_loss": 0.07188586890697479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895884079655529, "compression_loss": 0.0, "distillation_loss": 0.14720529317855835, "epoch": 4.24, "learning_rate": 1.6916733624451324e-05, "loss": 0.1541, "step": 4462, "task_loss": 0.2162376046180725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7895962875757389, "compression_loss": 0.0, "distillation_loss": 0.07019872963428497, "epoch": 4.24, "learning_rate": 1.690665144854198e-05, "loss": 0.0753, "step": 4463, "task_loss": 0.12100522965192795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896041632093366, "compression_loss": 0.0, "distillation_loss": 0.03042607009410858, "epoch": 4.24, "learning_rate": 1.6896570742828367e-05, "loss": 0.0286, "step": 4464, "task_loss": 0.012003440409898758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896120348673494, "compression_loss": 0.0, "distillation_loss": 0.10173506289720535, "epoch": 4.24, "learning_rate": 1.6886491509141717e-05, "loss": 0.1217, "step": 4465, "task_loss": 0.3017996847629547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896199025507812, "compression_loss": 0.0, "distillation_loss": 0.04094619303941727, "epoch": 4.24, "learning_rate": 1.6876413749312954e-05, "loss": 0.0476, "step": 4466, "task_loss": 0.10766998678445816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896277662606356, "compression_loss": 0.0, "distillation_loss": 0.019710058346390724, "epoch": 4.24, "learning_rate": 1.6866337465172754e-05, "loss": 0.0182, "step": 4467, "task_loss": 0.004470134153962135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896356259979161, "compression_loss": 0.0, "distillation_loss": 0.04553668200969696, "epoch": 4.24, "learning_rate": 1.685626265855153e-05, "loss": 0.0429, "step": 4468, "task_loss": 0.01903415471315384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896434817636268, "compression_loss": 0.0, "distillation_loss": 0.03563941270112991, "epoch": 4.24, "learning_rate": 1.6846189331279415e-05, "loss": 0.0426, "step": 4469, "task_loss": 0.10529813170433044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789651333558771, "compression_loss": 0.0, "distillation_loss": 0.09041808545589447, "epoch": 4.25, "learning_rate": 1.683611748518627e-05, "loss": 0.0915, "step": 4470, "task_loss": 0.10150431841611862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896591813843525, "compression_loss": 0.0, "distillation_loss": 0.031974878162145615, "epoch": 4.25, "learning_rate": 1.6826047122101703e-05, "loss": 0.0362, "step": 4471, "task_loss": 0.07398272305727005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896670252413751, "compression_loss": 0.0, "distillation_loss": 0.03553183376789093, "epoch": 4.25, "learning_rate": 1.6815978243855052e-05, "loss": 0.0332, "step": 4472, "task_loss": 0.012660248205065727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896748651308424, "compression_loss": 0.0, "distillation_loss": 0.1044113039970398, "epoch": 4.25, "learning_rate": 1.6805910852275358e-05, "loss": 0.1011, "step": 4473, "task_loss": 0.07151272147893906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789682701053758, "compression_loss": 0.0, "distillation_loss": 0.1547268033027649, "epoch": 4.25, "learning_rate": 1.6795844949191426e-05, "loss": 0.1734, "step": 4474, "task_loss": 0.34136468172073364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896905330111257, "compression_loss": 0.0, "distillation_loss": 0.07073305547237396, "epoch": 4.25, "learning_rate": 1.6785780536431772e-05, "loss": 0.0803, "step": 4475, "task_loss": 0.16646496951580048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7896983610039491, "compression_loss": 0.0, "distillation_loss": 0.0397295281291008, "epoch": 4.25, "learning_rate": 1.677571761582464e-05, "loss": 0.0411, "step": 4476, "task_loss": 0.05347587913274765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789706185033232, "compression_loss": 0.0, "distillation_loss": 0.04183419793844223, "epoch": 4.25, "learning_rate": 1.6765656189198013e-05, "loss": 0.0536, "step": 4477, "task_loss": 0.1599850058555603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897140050999778, "compression_loss": 0.0, "distillation_loss": 0.10868053883314133, "epoch": 4.25, "learning_rate": 1.675559625837959e-05, "loss": 0.1083, "step": 4478, "task_loss": 0.10476875305175781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897218212051905, "compression_loss": 0.0, "distillation_loss": 0.15036195516586304, "epoch": 4.25, "learning_rate": 1.6745537825196823e-05, "loss": 0.144, "step": 4479, "task_loss": 0.08634737133979797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897296333498737, "compression_loss": 0.0, "distillation_loss": 0.12207113206386566, "epoch": 4.25, "learning_rate": 1.6735480891476855e-05, "loss": 0.1236, "step": 4480, "task_loss": 0.13720379769802094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789737441535031, "compression_loss": 0.0, "distillation_loss": 0.030313938856124878, "epoch": 4.26, "learning_rate": 1.672542545904659e-05, "loss": 0.0284, "step": 4481, "task_loss": 0.010765250772237778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897452457616662, "compression_loss": 0.0, "distillation_loss": 0.08106144517660141, "epoch": 4.26, "learning_rate": 1.6715371529732643e-05, "loss": 0.1023, "step": 4482, "task_loss": 0.2930178940296173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897530460307828, "compression_loss": 0.0, "distillation_loss": 0.21419498324394226, "epoch": 4.26, "learning_rate": 1.6705319105361357e-05, "loss": 0.2036, "step": 4483, "task_loss": 0.10846757143735886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897608423433846, "compression_loss": 0.0, "distillation_loss": 0.09862993657588959, "epoch": 4.26, "learning_rate": 1.6695268187758797e-05, "loss": 0.1024, "step": 4484, "task_loss": 0.1367725133895874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897686347004753, "compression_loss": 0.0, "distillation_loss": 0.05723030865192413, "epoch": 4.26, "learning_rate": 1.6685218778750775e-05, "loss": 0.0594, "step": 4485, "task_loss": 0.07874364405870438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897764231030585, "compression_loss": 0.0, "distillation_loss": 0.017719998955726624, "epoch": 4.26, "learning_rate": 1.66751708801628e-05, "loss": 0.0165, "step": 4486, "task_loss": 0.005893906578421593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789784207552138, "compression_loss": 0.0, "distillation_loss": 0.06374501436948776, "epoch": 4.26, "learning_rate": 1.6665124493820123e-05, "loss": 0.0772, "step": 4487, "task_loss": 0.19814634323120117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897919880487174, "compression_loss": 0.0, "distillation_loss": 0.05392298847436905, "epoch": 4.26, "learning_rate": 1.6655079621547727e-05, "loss": 0.0499, "step": 4488, "task_loss": 0.013967299833893776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7897997645938004, "compression_loss": 0.0, "distillation_loss": 0.05942397564649582, "epoch": 4.26, "learning_rate": 1.6645036265170314e-05, "loss": 0.0579, "step": 4489, "task_loss": 0.04388638213276863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898075371883907, "compression_loss": 0.0, "distillation_loss": 0.035995546728372574, "epoch": 4.26, "learning_rate": 1.6634994426512296e-05, "loss": 0.0414, "step": 4490, "task_loss": 0.09044382721185684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898153058334919, "compression_loss": 0.0, "distillation_loss": 0.07521656155586243, "epoch": 4.26, "learning_rate": 1.662495410739783e-05, "loss": 0.0794, "step": 4491, "task_loss": 0.11709287762641907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898230705301078, "compression_loss": 0.0, "distillation_loss": 0.04527838155627251, "epoch": 4.27, "learning_rate": 1.661491530965078e-05, "loss": 0.0513, "step": 4492, "task_loss": 0.10566423833370209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789830831279242, "compression_loss": 0.0, "distillation_loss": 0.017616283148527145, "epoch": 4.27, "learning_rate": 1.660487803509475e-05, "loss": 0.0164, "step": 4493, "task_loss": 0.0058811865746974945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898385880818982, "compression_loss": 0.0, "distillation_loss": 0.04209098592400551, "epoch": 4.27, "learning_rate": 1.6594842285553062e-05, "loss": 0.0466, "step": 4494, "task_loss": 0.08670932054519653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898463409390801, "compression_loss": 0.0, "distillation_loss": 0.022504784166812897, "epoch": 4.27, "learning_rate": 1.6584808062848743e-05, "loss": 0.0301, "step": 4495, "task_loss": 0.09832281619310379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898540898517915, "compression_loss": 0.0, "distillation_loss": 0.09364865720272064, "epoch": 4.27, "learning_rate": 1.6574775368804567e-05, "loss": 0.0985, "step": 4496, "task_loss": 0.14257903397083282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898618348210358, "compression_loss": 0.0, "distillation_loss": 0.03564376011490822, "epoch": 4.27, "learning_rate": 1.656474420524302e-05, "loss": 0.0466, "step": 4497, "task_loss": 0.14488337934017181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898695758478169, "compression_loss": 0.0, "distillation_loss": 0.0824960246682167, "epoch": 4.27, "learning_rate": 1.6554714573986324e-05, "loss": 0.0832, "step": 4498, "task_loss": 0.08933614194393158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898773129331385, "compression_loss": 0.0, "distillation_loss": 0.14729326963424683, "epoch": 4.27, "learning_rate": 1.654468647685639e-05, "loss": 0.1449, "step": 4499, "task_loss": 0.12342742830514908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.789885046078004, "compression_loss": 0.0, "distillation_loss": 0.02807687409222126, "epoch": 4.27, "learning_rate": 1.6534659915674882e-05, "loss": 0.0259, "step": 4500, "task_loss": 0.006013935431838036 }, { "epoch": 4.27, "eval_accuracy": 0.8979357798165137, "eval_loss": 0.482150673866272, "eval_runtime": 18.0094, "eval_samples_per_second": 48.419, "eval_steps_per_second": 6.052, "step": 4500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7898927752834174, "compression_loss": 0.0, "distillation_loss": 0.05030520260334015, "epoch": 4.27, "learning_rate": 1.6524634892263176e-05, "loss": 0.049, "step": 4501, "task_loss": 0.0376378558576107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899005005503823, "compression_loss": 0.0, "distillation_loss": 0.10964888334274292, "epoch": 4.28, "learning_rate": 1.651461140844235e-05, "loss": 0.1056, "step": 4502, "task_loss": 0.0688520222902298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899082218799023, "compression_loss": 0.0, "distillation_loss": 0.19415023922920227, "epoch": 4.28, "learning_rate": 1.6504589466033226e-05, "loss": 0.1881, "step": 4503, "task_loss": 0.133355051279068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899159392729812, "compression_loss": 0.0, "distillation_loss": 0.021605759859085083, "epoch": 4.28, "learning_rate": 1.6494569066856343e-05, "loss": 0.0294, "step": 4504, "task_loss": 0.09938529133796692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899236527306225, "compression_loss": 0.0, "distillation_loss": 0.025142306461930275, "epoch": 4.28, "learning_rate": 1.6484550212731953e-05, "loss": 0.0293, "step": 4505, "task_loss": 0.06672917306423187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899313622538301, "compression_loss": 0.0, "distillation_loss": 0.07510918378829956, "epoch": 4.28, "learning_rate": 1.6474532905480027e-05, "loss": 0.0898, "step": 4506, "task_loss": 0.22238320112228394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899390678436075, "compression_loss": 0.0, "distillation_loss": 0.06910199671983719, "epoch": 4.28, "learning_rate": 1.6464517146920255e-05, "loss": 0.0727, "step": 4507, "task_loss": 0.10474320501089096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899467695009585, "compression_loss": 0.0, "distillation_loss": 0.023270685225725174, "epoch": 4.28, "learning_rate": 1.645450293887206e-05, "loss": 0.0214, "step": 4508, "task_loss": 0.004897216334939003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899544672268868, "compression_loss": 0.0, "distillation_loss": 0.01756512001156807, "epoch": 4.28, "learning_rate": 1.6444490283154557e-05, "loss": 0.0164, "step": 4509, "task_loss": 0.006135221570730209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899621610223959, "compression_loss": 0.0, "distillation_loss": 0.05131693556904793, "epoch": 4.28, "learning_rate": 1.6434479181586594e-05, "loss": 0.067, "step": 4510, "task_loss": 0.20782402157783508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899698508884897, "compression_loss": 0.0, "distillation_loss": 0.024594193324446678, "epoch": 4.28, "learning_rate": 1.6424469635986744e-05, "loss": 0.0271, "step": 4511, "task_loss": 0.04964712634682655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899775368261717, "compression_loss": 0.0, "distillation_loss": 0.0458107516169548, "epoch": 4.28, "learning_rate": 1.6414461648173284e-05, "loss": 0.0431, "step": 4512, "task_loss": 0.019116627052426338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899852188364457, "compression_loss": 0.0, "distillation_loss": 0.028601201251149178, "epoch": 4.29, "learning_rate": 1.6404455219964203e-05, "loss": 0.0261, "step": 4513, "task_loss": 0.0036617927253246307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7899928969203154, "compression_loss": 0.0, "distillation_loss": 0.07332905381917953, "epoch": 4.29, "learning_rate": 1.6394450353177242e-05, "loss": 0.0782, "step": 4514, "task_loss": 0.12209247052669525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900005710787844, "compression_loss": 0.0, "distillation_loss": 0.051219768822193146, "epoch": 4.29, "learning_rate": 1.6384447049629816e-05, "loss": 0.0545, "step": 4515, "task_loss": 0.08445730060338974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900082413128564, "compression_loss": 0.0, "distillation_loss": 0.12552213668823242, "epoch": 4.29, "learning_rate": 1.6374445311139074e-05, "loss": 0.1197, "step": 4516, "task_loss": 0.06750532984733582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900159076235351, "compression_loss": 0.0, "distillation_loss": 0.231977179646492, "epoch": 4.29, "learning_rate": 1.6364445139521883e-05, "loss": 0.2244, "step": 4517, "task_loss": 0.15585389733314514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900235700118242, "compression_loss": 0.0, "distillation_loss": 0.027736982330679893, "epoch": 4.29, "learning_rate": 1.635444653659483e-05, "loss": 0.0257, "step": 4518, "task_loss": 0.007768923416733742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900312284787274, "compression_loss": 0.0, "distillation_loss": 0.050665318965911865, "epoch": 4.29, "learning_rate": 1.6344449504174193e-05, "loss": 0.0625, "step": 4519, "task_loss": 0.16943272948265076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900388830252483, "compression_loss": 0.0, "distillation_loss": 0.03559410944581032, "epoch": 4.29, "learning_rate": 1.6334454044075988e-05, "loss": 0.0405, "step": 4520, "task_loss": 0.08508558571338654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900465336523906, "compression_loss": 0.0, "distillation_loss": 0.05828773230314255, "epoch": 4.29, "learning_rate": 1.6324460158115942e-05, "loss": 0.0552, "step": 4521, "task_loss": 0.027413969859480858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900541803611582, "compression_loss": 0.0, "distillation_loss": 0.2908591628074646, "epoch": 4.29, "learning_rate": 1.6314467848109483e-05, "loss": 0.2765, "step": 4522, "task_loss": 0.14718219637870789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900618231525544, "compression_loss": 0.0, "distillation_loss": 0.013387423008680344, "epoch": 4.3, "learning_rate": 1.6304477115871776e-05, "loss": 0.0194, "step": 4523, "task_loss": 0.07351858913898468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900694620275831, "compression_loss": 0.0, "distillation_loss": 0.13720417022705078, "epoch": 4.3, "learning_rate": 1.6294487963217677e-05, "loss": 0.129, "step": 4524, "task_loss": 0.05525549128651619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900770969872479, "compression_loss": 0.0, "distillation_loss": 0.024422302842140198, "epoch": 4.3, "learning_rate": 1.6284500391961772e-05, "loss": 0.0328, "step": 4525, "task_loss": 0.10812153667211533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900847280325526, "compression_loss": 0.0, "distillation_loss": 0.13294796645641327, "epoch": 4.3, "learning_rate": 1.627451440391834e-05, "loss": 0.1583, "step": 4526, "task_loss": 0.38683199882507324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900923551645008, "compression_loss": 0.0, "distillation_loss": 0.025384480133652687, "epoch": 4.3, "learning_rate": 1.626453000090139e-05, "loss": 0.0332, "step": 4527, "task_loss": 0.10327724367380142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7900999783840963, "compression_loss": 0.0, "distillation_loss": 0.1557433307170868, "epoch": 4.3, "learning_rate": 1.625454718472464e-05, "loss": 0.1661, "step": 4528, "task_loss": 0.2594253718852997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901075976923425, "compression_loss": 0.0, "distillation_loss": 0.07799336314201355, "epoch": 4.3, "learning_rate": 1.6244565957201506e-05, "loss": 0.0748, "step": 4529, "task_loss": 0.04625723510980606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901152130902435, "compression_loss": 0.0, "distillation_loss": 0.03408731520175934, "epoch": 4.3, "learning_rate": 1.6234586320145125e-05, "loss": 0.0314, "step": 4530, "task_loss": 0.007508426904678345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901228245788026, "compression_loss": 0.0, "distillation_loss": 0.023436367511749268, "epoch": 4.3, "learning_rate": 1.6224608275368364e-05, "loss": 0.0238, "step": 4531, "task_loss": 0.026913581416010857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901304321590236, "compression_loss": 0.0, "distillation_loss": 0.02828259766101837, "epoch": 4.3, "learning_rate": 1.6214631824683773e-05, "loss": 0.026, "step": 4532, "task_loss": 0.005199538543820381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901380358319102, "compression_loss": 0.0, "distillation_loss": 0.02083125337958336, "epoch": 4.3, "learning_rate": 1.6204656969903618e-05, "loss": 0.0344, "step": 4533, "task_loss": 0.15678700804710388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901456355984662, "compression_loss": 0.0, "distillation_loss": 0.023461364209651947, "epoch": 4.31, "learning_rate": 1.6194683712839885e-05, "loss": 0.0259, "step": 4534, "task_loss": 0.048007965087890625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901532314596952, "compression_loss": 0.0, "distillation_loss": 0.03079340234398842, "epoch": 4.31, "learning_rate": 1.618471205530427e-05, "loss": 0.0345, "step": 4535, "task_loss": 0.0679367259144783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901608234166008, "compression_loss": 0.0, "distillation_loss": 0.034932032227516174, "epoch": 4.31, "learning_rate": 1.6174741999108157e-05, "loss": 0.0328, "step": 4536, "task_loss": 0.013840943574905396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901684114701867, "compression_loss": 0.0, "distillation_loss": 0.09038272500038147, "epoch": 4.31, "learning_rate": 1.6164773546062667e-05, "loss": 0.0854, "step": 4537, "task_loss": 0.04105079919099808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901759956214566, "compression_loss": 0.0, "distillation_loss": 0.056141406297683716, "epoch": 4.31, "learning_rate": 1.6154806697978608e-05, "loss": 0.0534, "step": 4538, "task_loss": 0.028462648391723633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901835758714143, "compression_loss": 0.0, "distillation_loss": 0.02680385112762451, "epoch": 4.31, "learning_rate": 1.614484145666651e-05, "loss": 0.0247, "step": 4539, "task_loss": 0.005452977493405342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901911522210633, "compression_loss": 0.0, "distillation_loss": 0.036854278296232224, "epoch": 4.31, "learning_rate": 1.613487782393661e-05, "loss": 0.0338, "step": 4540, "task_loss": 0.006693465635180473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7901987246714074, "compression_loss": 0.0, "distillation_loss": 0.019134201109409332, "epoch": 4.31, "learning_rate": 1.6124915801598852e-05, "loss": 0.0225, "step": 4541, "task_loss": 0.05329040437936783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902062932234503, "compression_loss": 0.0, "distillation_loss": 0.2091977298259735, "epoch": 4.31, "learning_rate": 1.6114955391462878e-05, "loss": 0.2147, "step": 4542, "task_loss": 0.2644974887371063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902138578781955, "compression_loss": 0.0, "distillation_loss": 0.07729081809520721, "epoch": 4.31, "learning_rate": 1.6104996595338047e-05, "loss": 0.0728, "step": 4543, "task_loss": 0.03260170668363571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790221418636647, "compression_loss": 0.0, "distillation_loss": 0.1044703871011734, "epoch": 4.32, "learning_rate": 1.609503941503343e-05, "loss": 0.107, "step": 4544, "task_loss": 0.12950289249420166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902289754998082, "compression_loss": 0.0, "distillation_loss": 0.03879034146666527, "epoch": 4.32, "learning_rate": 1.6085083852357786e-05, "loss": 0.0542, "step": 4545, "task_loss": 0.1928063929080963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902365284686829, "compression_loss": 0.0, "distillation_loss": 0.13962599635124207, "epoch": 4.32, "learning_rate": 1.6075129909119592e-05, "loss": 0.1528, "step": 4546, "task_loss": 0.27165764570236206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902440775442747, "compression_loss": 0.0, "distillation_loss": 0.05956469848752022, "epoch": 4.32, "learning_rate": 1.6065177587127027e-05, "loss": 0.0687, "step": 4547, "task_loss": 0.15051259100437164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902516227275874, "compression_loss": 0.0, "distillation_loss": 0.022979095578193665, "epoch": 4.32, "learning_rate": 1.6055226888188e-05, "loss": 0.0238, "step": 4548, "task_loss": 0.030920779332518578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902591640196246, "compression_loss": 0.0, "distillation_loss": 0.015515048988163471, "epoch": 4.32, "learning_rate": 1.604527781411008e-05, "loss": 0.0215, "step": 4549, "task_loss": 0.07502803206443787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79026670142139, "compression_loss": 0.0, "distillation_loss": 0.025966979563236237, "epoch": 4.32, "learning_rate": 1.6035330366700567e-05, "loss": 0.039, "step": 4550, "task_loss": 0.15588192641735077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902742349338874, "compression_loss": 0.0, "distillation_loss": 0.011777522042393684, "epoch": 4.32, "learning_rate": 1.6025384547766477e-05, "loss": 0.011, "step": 4551, "task_loss": 0.003892483189702034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902817645581203, "compression_loss": 0.0, "distillation_loss": 0.018658041954040527, "epoch": 4.32, "learning_rate": 1.6015440359114497e-05, "loss": 0.0173, "step": 4552, "task_loss": 0.004857182502746582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902892902950923, "compression_loss": 0.0, "distillation_loss": 0.052327413111925125, "epoch": 4.32, "learning_rate": 1.600549780255105e-05, "loss": 0.0637, "step": 4553, "task_loss": 0.1657162308692932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7902968121458074, "compression_loss": 0.0, "distillation_loss": 0.22867000102996826, "epoch": 4.32, "learning_rate": 1.5995556879882246e-05, "loss": 0.2293, "step": 4554, "task_loss": 0.23492863774299622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903043301112691, "compression_loss": 0.0, "distillation_loss": 0.014244308695197105, "epoch": 4.33, "learning_rate": 1.59856175929139e-05, "loss": 0.0134, "step": 4555, "task_loss": 0.006260443478822708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903118441924811, "compression_loss": 0.0, "distillation_loss": 0.06026815250515938, "epoch": 4.33, "learning_rate": 1.597567994345152e-05, "loss": 0.0547, "step": 4556, "task_loss": 0.004877146333456039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903193543904471, "compression_loss": 0.0, "distillation_loss": 0.04478321969509125, "epoch": 4.33, "learning_rate": 1.5965743933300352e-05, "loss": 0.0435, "step": 4557, "task_loss": 0.03176284581422806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903268607061708, "compression_loss": 0.0, "distillation_loss": 0.044730305671691895, "epoch": 4.33, "learning_rate": 1.595580956426531e-05, "loss": 0.0555, "step": 4558, "task_loss": 0.1524236649274826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903343631406556, "compression_loss": 0.0, "distillation_loss": 0.022832248359918594, "epoch": 4.33, "learning_rate": 1.5945876838151014e-05, "loss": 0.0285, "step": 4559, "task_loss": 0.07918474823236465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903418616949057, "compression_loss": 0.0, "distillation_loss": 0.13835708796977997, "epoch": 4.33, "learning_rate": 1.5935945756761794e-05, "loss": 0.1415, "step": 4560, "task_loss": 0.17012692987918854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903493563699244, "compression_loss": 0.0, "distillation_loss": 0.06880520284175873, "epoch": 4.33, "learning_rate": 1.592601632190169e-05, "loss": 0.0639, "step": 4561, "task_loss": 0.019452493637800217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903568471667155, "compression_loss": 0.0, "distillation_loss": 0.046133361756801605, "epoch": 4.33, "learning_rate": 1.591608853537441e-05, "loss": 0.0463, "step": 4562, "task_loss": 0.04781344532966614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903643340862827, "compression_loss": 0.0, "distillation_loss": 0.038969479501247406, "epoch": 4.33, "learning_rate": 1.5906162398983397e-05, "loss": 0.05, "step": 4563, "task_loss": 0.14975658059120178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903718171296297, "compression_loss": 0.0, "distillation_loss": 0.07891889661550522, "epoch": 4.33, "learning_rate": 1.5896237914531793e-05, "loss": 0.0827, "step": 4564, "task_loss": 0.1163320243358612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79037929629776, "compression_loss": 0.0, "distillation_loss": 0.014497898519039154, "epoch": 4.34, "learning_rate": 1.58863150838224e-05, "loss": 0.0135, "step": 4565, "task_loss": 0.004507582634687424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903867715916776, "compression_loss": 0.0, "distillation_loss": 0.07853427529335022, "epoch": 4.34, "learning_rate": 1.5876393908657766e-05, "loss": 0.0769, "step": 4566, "task_loss": 0.06254800409078598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7903942430123858, "compression_loss": 0.0, "distillation_loss": 0.04548817127943039, "epoch": 4.34, "learning_rate": 1.5866474390840125e-05, "loss": 0.0488, "step": 4567, "task_loss": 0.07903194427490234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904017105608887, "compression_loss": 0.0, "distillation_loss": 0.01196481566876173, "epoch": 4.34, "learning_rate": 1.58565565321714e-05, "loss": 0.0192, "step": 4568, "task_loss": 0.0844261422753334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904091742381896, "compression_loss": 0.0, "distillation_loss": 0.0883462056517601, "epoch": 4.34, "learning_rate": 1.584664033445321e-05, "loss": 0.1073, "step": 4569, "task_loss": 0.27824994921684265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904166340452924, "compression_loss": 0.0, "distillation_loss": 0.06949096918106079, "epoch": 4.34, "learning_rate": 1.583672579948689e-05, "loss": 0.0762, "step": 4570, "task_loss": 0.13672670722007751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904240899832008, "compression_loss": 0.0, "distillation_loss": 0.017165692523121834, "epoch": 4.34, "learning_rate": 1.582681292907346e-05, "loss": 0.0189, "step": 4571, "task_loss": 0.03450107201933861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904315420529183, "compression_loss": 0.0, "distillation_loss": 0.028937380760908127, "epoch": 4.34, "learning_rate": 1.581690172501364e-05, "loss": 0.0334, "step": 4572, "task_loss": 0.07330554723739624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904389902554487, "compression_loss": 0.0, "distillation_loss": 0.04542282223701477, "epoch": 4.34, "learning_rate": 1.5806992189107838e-05, "loss": 0.0445, "step": 4573, "task_loss": 0.036514170467853546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904464345917958, "compression_loss": 0.0, "distillation_loss": 0.11212094128131866, "epoch": 4.34, "learning_rate": 1.5797084323156186e-05, "loss": 0.1122, "step": 4574, "task_loss": 0.11315297335386276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790453875062963, "compression_loss": 0.0, "distillation_loss": 0.09616538882255554, "epoch": 4.34, "learning_rate": 1.5787178128958496e-05, "loss": 0.0895, "step": 4575, "task_loss": 0.029983239248394966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904613116699543, "compression_loss": 0.0, "distillation_loss": 0.03785378485918045, "epoch": 4.35, "learning_rate": 1.577727360831426e-05, "loss": 0.0441, "step": 4576, "task_loss": 0.1002790629863739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904687444137731, "compression_loss": 0.0, "distillation_loss": 0.024910183623433113, "epoch": 4.35, "learning_rate": 1.5767370763022694e-05, "loss": 0.0315, "step": 4577, "task_loss": 0.09050406515598297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904761732954233, "compression_loss": 0.0, "distillation_loss": 0.07750061899423599, "epoch": 4.35, "learning_rate": 1.5757469594882692e-05, "loss": 0.0824, "step": 4578, "task_loss": 0.12608473002910614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904835983159084, "compression_loss": 0.0, "distillation_loss": 0.03907743841409683, "epoch": 4.35, "learning_rate": 1.574757010569285e-05, "loss": 0.0363, "step": 4579, "task_loss": 0.011346584185957909 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904910194762322, "compression_loss": 0.0, "distillation_loss": 0.04754206910729408, "epoch": 4.35, "learning_rate": 1.5737672297251464e-05, "loss": 0.0501, "step": 4580, "task_loss": 0.07262822985649109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7904984367773984, "compression_loss": 0.0, "distillation_loss": 0.07429815083742142, "epoch": 4.35, "learning_rate": 1.5727776171356506e-05, "loss": 0.0701, "step": 4581, "task_loss": 0.032718729227781296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905058502204106, "compression_loss": 0.0, "distillation_loss": 0.10426107048988342, "epoch": 4.35, "learning_rate": 1.571788172980566e-05, "loss": 0.1008, "step": 4582, "task_loss": 0.06919807940721512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905132598062725, "compression_loss": 0.0, "distillation_loss": 0.21805521845817566, "epoch": 4.35, "learning_rate": 1.5707988974396304e-05, "loss": 0.2072, "step": 4583, "task_loss": 0.10992544889450073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905206655359879, "compression_loss": 0.0, "distillation_loss": 0.10667760670185089, "epoch": 4.35, "learning_rate": 1.56980979069255e-05, "loss": 0.1054, "step": 4584, "task_loss": 0.09397716075181961 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905280674105603, "compression_loss": 0.0, "distillation_loss": 0.01430472545325756, "epoch": 4.35, "learning_rate": 1.568820852919002e-05, "loss": 0.0183, "step": 4585, "task_loss": 0.05472009256482124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905354654309934, "compression_loss": 0.0, "distillation_loss": 0.037967242300510406, "epoch": 4.36, "learning_rate": 1.5678320842986295e-05, "loss": 0.0446, "step": 4586, "task_loss": 0.10416756570339203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905428595982911, "compression_loss": 0.0, "distillation_loss": 0.07452750205993652, "epoch": 4.36, "learning_rate": 1.5668434850110493e-05, "loss": 0.0828, "step": 4587, "task_loss": 0.1572713553905487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905502499134568, "compression_loss": 0.0, "distillation_loss": 0.018898198381066322, "epoch": 4.36, "learning_rate": 1.565855055235843e-05, "loss": 0.0174, "step": 4588, "task_loss": 0.004319200292229652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905576363774943, "compression_loss": 0.0, "distillation_loss": 0.017305273562669754, "epoch": 4.36, "learning_rate": 1.5648667951525653e-05, "loss": 0.0243, "step": 4589, "task_loss": 0.08709577471017838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905650189914074, "compression_loss": 0.0, "distillation_loss": 0.02825682982802391, "epoch": 4.36, "learning_rate": 1.5638787049407382e-05, "loss": 0.035, "step": 4590, "task_loss": 0.09588130563497543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905723977561996, "compression_loss": 0.0, "distillation_loss": 0.03142907842993736, "epoch": 4.36, "learning_rate": 1.5628907847798517e-05, "loss": 0.036, "step": 4591, "task_loss": 0.07705745846033096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905797726728746, "compression_loss": 0.0, "distillation_loss": 0.040951065719127655, "epoch": 4.36, "learning_rate": 1.5619030348493684e-05, "loss": 0.0562, "step": 4592, "task_loss": 0.19311881065368652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905871437424362, "compression_loss": 0.0, "distillation_loss": 0.023970093578100204, "epoch": 4.36, "learning_rate": 1.5609154553287163e-05, "loss": 0.032, "step": 4593, "task_loss": 0.10465425252914429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7905945109658881, "compression_loss": 0.0, "distillation_loss": 0.03335358202457428, "epoch": 4.36, "learning_rate": 1.5599280463972953e-05, "loss": 0.0312, "step": 4594, "task_loss": 0.011852225288748741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906018743442338, "compression_loss": 0.0, "distillation_loss": 0.10270802676677704, "epoch": 4.36, "learning_rate": 1.558940808234471e-05, "loss": 0.1207, "step": 4595, "task_loss": 0.2826935052871704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790609233878477, "compression_loss": 0.0, "distillation_loss": 0.11305034160614014, "epoch": 4.36, "learning_rate": 1.5579537410195817e-05, "loss": 0.1194, "step": 4596, "task_loss": 0.1764708012342453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906165895696217, "compression_loss": 0.0, "distillation_loss": 0.02198610082268715, "epoch": 4.37, "learning_rate": 1.5569668449319323e-05, "loss": 0.0204, "step": 4597, "task_loss": 0.005980312824249268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906239414186712, "compression_loss": 0.0, "distillation_loss": 0.03549554944038391, "epoch": 4.37, "learning_rate": 1.5559801201507968e-05, "loss": 0.0526, "step": 4598, "task_loss": 0.20651255548000336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906312894266293, "compression_loss": 0.0, "distillation_loss": 0.09966300427913666, "epoch": 4.37, "learning_rate": 1.554993566855418e-05, "loss": 0.1053, "step": 4599, "task_loss": 0.156356543302536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906386335944998, "compression_loss": 0.0, "distillation_loss": 0.0843530148267746, "epoch": 4.37, "learning_rate": 1.5540071852250106e-05, "loss": 0.0909, "step": 4600, "task_loss": 0.14952674508094788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906459739232862, "compression_loss": 0.0, "distillation_loss": 0.08812104910612106, "epoch": 4.37, "learning_rate": 1.5530209754387537e-05, "loss": 0.0843, "step": 4601, "task_loss": 0.04979713633656502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906533104139923, "compression_loss": 0.0, "distillation_loss": 0.0816584974527359, "epoch": 4.37, "learning_rate": 1.552034937675797e-05, "loss": 0.0981, "step": 4602, "task_loss": 0.24617856740951538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906606430676218, "compression_loss": 0.0, "distillation_loss": 0.022141676396131516, "epoch": 4.37, "learning_rate": 1.5510490721152592e-05, "loss": 0.0206, "step": 4603, "task_loss": 0.006470389664173126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906679718851783, "compression_loss": 0.0, "distillation_loss": 0.039304304867982864, "epoch": 4.37, "learning_rate": 1.550063378936228e-05, "loss": 0.0401, "step": 4604, "task_loss": 0.047406185418367386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906752968676656, "compression_loss": 0.0, "distillation_loss": 0.03900455683469772, "epoch": 4.37, "learning_rate": 1.549077858317759e-05, "loss": 0.0587, "step": 4605, "task_loss": 0.23572197556495667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906826180160873, "compression_loss": 0.0, "distillation_loss": 0.01960546150803566, "epoch": 4.37, "learning_rate": 1.5480925104388762e-05, "loss": 0.0222, "step": 4606, "task_loss": 0.045594025403261185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790689935331447, "compression_loss": 0.0, "distillation_loss": 0.011018482968211174, "epoch": 4.38, "learning_rate": 1.547107335478574e-05, "loss": 0.0162, "step": 4607, "task_loss": 0.06283082067966461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7906972488147486, "compression_loss": 0.0, "distillation_loss": 0.13586799800395966, "epoch": 4.38, "learning_rate": 1.5461223336158127e-05, "loss": 0.1389, "step": 4608, "task_loss": 0.1658676415681839 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907045584669955, "compression_loss": 0.0, "distillation_loss": 0.061904579401016235, "epoch": 4.38, "learning_rate": 1.5451375050295235e-05, "loss": 0.0694, "step": 4609, "task_loss": 0.1364220827817917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907118642891916, "compression_loss": 0.0, "distillation_loss": 0.06705223023891449, "epoch": 4.38, "learning_rate": 1.5441528498986053e-05, "loss": 0.0632, "step": 4610, "task_loss": 0.028953341767191887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907191662823405, "compression_loss": 0.0, "distillation_loss": 0.06535607576370239, "epoch": 4.38, "learning_rate": 1.543168368401926e-05, "loss": 0.0687, "step": 4611, "task_loss": 0.09915080666542053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907264644474459, "compression_loss": 0.0, "distillation_loss": 0.041197728365659714, "epoch": 4.38, "learning_rate": 1.5421840607183203e-05, "loss": 0.0381, "step": 4612, "task_loss": 0.009746959432959557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907337587855114, "compression_loss": 0.0, "distillation_loss": 0.009639294818043709, "epoch": 4.38, "learning_rate": 1.5411999270265924e-05, "loss": 0.0091, "step": 4613, "task_loss": 0.004122687503695488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790741049297541, "compression_loss": 0.0, "distillation_loss": 0.04923541843891144, "epoch": 4.38, "learning_rate": 1.5402159675055166e-05, "loss": 0.0499, "step": 4614, "task_loss": 0.05560116469860077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790748335984538, "compression_loss": 0.0, "distillation_loss": 0.062305059283971786, "epoch": 4.38, "learning_rate": 1.5392321823338318e-05, "loss": 0.0646, "step": 4615, "task_loss": 0.08488155901432037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907556188475061, "compression_loss": 0.0, "distillation_loss": 0.08541512489318848, "epoch": 4.38, "learning_rate": 1.5382485716902486e-05, "loss": 0.0906, "step": 4616, "task_loss": 0.13751107454299927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907628978874492, "compression_loss": 0.0, "distillation_loss": 0.01606060564517975, "epoch": 4.38, "learning_rate": 1.537265135753443e-05, "loss": 0.0216, "step": 4617, "task_loss": 0.07104544341564178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.790770173105371, "compression_loss": 0.0, "distillation_loss": 0.013350674882531166, "epoch": 4.39, "learning_rate": 1.536281874702063e-05, "loss": 0.0173, "step": 4618, "task_loss": 0.05334080010652542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907774445022749, "compression_loss": 0.0, "distillation_loss": 0.03925604373216629, "epoch": 4.39, "learning_rate": 1.535298788714722e-05, "loss": 0.0443, "step": 4619, "task_loss": 0.08980512619018555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907847120791649, "compression_loss": 0.0, "distillation_loss": 0.07377579063177109, "epoch": 4.39, "learning_rate": 1.5343158779700016e-05, "loss": 0.0715, "step": 4620, "task_loss": 0.050635963678359985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907919758370445, "compression_loss": 0.0, "distillation_loss": 0.034185461699962616, "epoch": 4.39, "learning_rate": 1.533333142646453e-05, "loss": 0.0327, "step": 4621, "task_loss": 0.019142286852002144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7907992357769174, "compression_loss": 0.0, "distillation_loss": 0.08377894014120102, "epoch": 4.39, "learning_rate": 1.5323505829225947e-05, "loss": 0.0824, "step": 4622, "task_loss": 0.07016059011220932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908064918997872, "compression_loss": 0.0, "distillation_loss": 0.13712866604328156, "epoch": 4.39, "learning_rate": 1.5313681989769136e-05, "loss": 0.1362, "step": 4623, "task_loss": 0.12798915803432465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908137442066578, "compression_loss": 0.0, "distillation_loss": 0.08357212692499161, "epoch": 4.39, "learning_rate": 1.530385990987863e-05, "loss": 0.0788, "step": 4624, "task_loss": 0.035898420959711075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908209926985328, "compression_loss": 0.0, "distillation_loss": 0.025412458926439285, "epoch": 4.39, "learning_rate": 1.529403959133867e-05, "loss": 0.0407, "step": 4625, "task_loss": 0.17801359295845032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908282373764158, "compression_loss": 0.0, "distillation_loss": 0.020880520343780518, "epoch": 4.39, "learning_rate": 1.5284221035933166e-05, "loss": 0.0194, "step": 4626, "task_loss": 0.006218817085027695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908354782413105, "compression_loss": 0.0, "distillation_loss": 0.09893855452537537, "epoch": 4.39, "learning_rate": 1.5274404245445704e-05, "loss": 0.0946, "step": 4627, "task_loss": 0.05531448870897293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908427152942207, "compression_loss": 0.0, "distillation_loss": 0.13214893639087677, "epoch": 4.4, "learning_rate": 1.5264589221659553e-05, "loss": 0.1336, "step": 4628, "task_loss": 0.14715173840522766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908499485361499, "compression_loss": 0.0, "distillation_loss": 0.11516699194908142, "epoch": 4.4, "learning_rate": 1.5254775966357653e-05, "loss": 0.1095, "step": 4629, "task_loss": 0.058957893401384354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908571779681018, "compression_loss": 0.0, "distillation_loss": 0.08969461172819138, "epoch": 4.4, "learning_rate": 1.5244964481322637e-05, "loss": 0.0989, "step": 4630, "task_loss": 0.18196004629135132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908644035910803, "compression_loss": 0.0, "distillation_loss": 0.1797245740890503, "epoch": 4.4, "learning_rate": 1.5235154768336795e-05, "loss": 0.1714, "step": 4631, "task_loss": 0.09689280390739441 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908716254060889, "compression_loss": 0.0, "distillation_loss": 0.08713481575250626, "epoch": 4.4, "learning_rate": 1.5225346829182121e-05, "loss": 0.0895, "step": 4632, "task_loss": 0.1106729805469513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908788434141313, "compression_loss": 0.0, "distillation_loss": 0.05905129015445709, "epoch": 4.4, "learning_rate": 1.5215540665640277e-05, "loss": 0.0611, "step": 4633, "task_loss": 0.07935148477554321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908860576162112, "compression_loss": 0.0, "distillation_loss": 0.08967553079128265, "epoch": 4.4, "learning_rate": 1.5205736279492574e-05, "loss": 0.0875, "step": 4634, "task_loss": 0.0675487220287323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7908932680133324, "compression_loss": 0.0, "distillation_loss": 0.07612395286560059, "epoch": 4.4, "learning_rate": 1.5195933672520064e-05, "loss": 0.0825, "step": 4635, "task_loss": 0.13981005549430847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909004746064983, "compression_loss": 0.0, "distillation_loss": 0.18954381346702576, "epoch": 4.4, "learning_rate": 1.5186132846503412e-05, "loss": 0.1939, "step": 4636, "task_loss": 0.23316198587417603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909076773967129, "compression_loss": 0.0, "distillation_loss": 0.02852441743016243, "epoch": 4.4, "learning_rate": 1.5176333803222998e-05, "loss": 0.0353, "step": 4637, "task_loss": 0.09584514796733856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909148763849796, "compression_loss": 0.0, "distillation_loss": 0.04975371062755585, "epoch": 4.4, "learning_rate": 1.5166536544458856e-05, "loss": 0.0526, "step": 4638, "task_loss": 0.0777263194322586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909220715723022, "compression_loss": 0.0, "distillation_loss": 0.17351922392845154, "epoch": 4.41, "learning_rate": 1.515674107199071e-05, "loss": 0.1641, "step": 4639, "task_loss": 0.07936342060565948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909292629596845, "compression_loss": 0.0, "distillation_loss": 0.028591414913535118, "epoch": 4.41, "learning_rate": 1.5146947387597956e-05, "loss": 0.0428, "step": 4640, "task_loss": 0.17051829397678375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79093645054813, "compression_loss": 0.0, "distillation_loss": 0.03785558417439461, "epoch": 4.41, "learning_rate": 1.513715549305966e-05, "loss": 0.0441, "step": 4641, "task_loss": 0.10044631361961365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909436343386426, "compression_loss": 0.0, "distillation_loss": 0.09519485384225845, "epoch": 4.41, "learning_rate": 1.512736539015457e-05, "loss": 0.0916, "step": 4642, "task_loss": 0.05921884626150131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909508143322257, "compression_loss": 0.0, "distillation_loss": 0.034322589635849, "epoch": 4.41, "learning_rate": 1.5117577080661094e-05, "loss": 0.0324, "step": 4643, "task_loss": 0.014751961454749107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909579905298831, "compression_loss": 0.0, "distillation_loss": 0.01889497973024845, "epoch": 4.41, "learning_rate": 1.5107790566357347e-05, "loss": 0.0176, "step": 4644, "task_loss": 0.005802595987915993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909651629326186, "compression_loss": 0.0, "distillation_loss": 0.09566991776227951, "epoch": 4.41, "learning_rate": 1.509800584902108e-05, "loss": 0.0992, "step": 4645, "task_loss": 0.13102050125598907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909723315414358, "compression_loss": 0.0, "distillation_loss": 0.06106077879667282, "epoch": 4.41, "learning_rate": 1.508822293042974e-05, "loss": 0.0612, "step": 4646, "task_loss": 0.06258445233106613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909794963573382, "compression_loss": 0.0, "distillation_loss": 0.046314168721437454, "epoch": 4.41, "learning_rate": 1.5078441812360445e-05, "loss": 0.0432, "step": 4647, "task_loss": 0.01564324088394642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909866573813299, "compression_loss": 0.0, "distillation_loss": 0.05536476522684097, "epoch": 4.41, "learning_rate": 1.5068662496589975e-05, "loss": 0.0511, "step": 4648, "task_loss": 0.012430863454937935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7909938146144142, "compression_loss": 0.0, "distillation_loss": 0.08107525110244751, "epoch": 4.42, "learning_rate": 1.5058884984894788e-05, "loss": 0.0793, "step": 4649, "task_loss": 0.06296955794095993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910009680575949, "compression_loss": 0.0, "distillation_loss": 0.04533292353153229, "epoch": 4.42, "learning_rate": 1.5049109279051026e-05, "loss": 0.0599, "step": 4650, "task_loss": 0.1911742091178894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910081177118757, "compression_loss": 0.0, "distillation_loss": 0.05968482419848442, "epoch": 4.42, "learning_rate": 1.503933538083448e-05, "loss": 0.0574, "step": 4651, "task_loss": 0.03725713863968849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910152635782604, "compression_loss": 0.0, "distillation_loss": 0.04093274474143982, "epoch": 4.42, "learning_rate": 1.5029563292020637e-05, "loss": 0.0498, "step": 4652, "task_loss": 0.12984903156757355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910224056577524, "compression_loss": 0.0, "distillation_loss": 0.0636286810040474, "epoch": 4.42, "learning_rate": 1.5019793014384643e-05, "loss": 0.0631, "step": 4653, "task_loss": 0.05869031697511673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910295439513556, "compression_loss": 0.0, "distillation_loss": 0.11052249372005463, "epoch": 4.42, "learning_rate": 1.5010024549701312e-05, "loss": 0.1109, "step": 4654, "task_loss": 0.11410997062921524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910366784600736, "compression_loss": 0.0, "distillation_loss": 0.06556328386068344, "epoch": 4.42, "learning_rate": 1.5000257899745134e-05, "loss": 0.0636, "step": 4655, "task_loss": 0.04608140140771866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910438091849102, "compression_loss": 0.0, "distillation_loss": 0.027359087020158768, "epoch": 4.42, "learning_rate": 1.4990493066290265e-05, "loss": 0.0253, "step": 4656, "task_loss": 0.0065147485584020615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910509361268689, "compression_loss": 0.0, "distillation_loss": 0.03795522451400757, "epoch": 4.42, "learning_rate": 1.4980730051110541e-05, "loss": 0.0557, "step": 4657, "task_loss": 0.21516427397727966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910580592869536, "compression_loss": 0.0, "distillation_loss": 0.03814719617366791, "epoch": 4.42, "learning_rate": 1.4970968855979455e-05, "loss": 0.046, "step": 4658, "task_loss": 0.11639110743999481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910651786661678, "compression_loss": 0.0, "distillation_loss": 0.06086570769548416, "epoch": 4.42, "learning_rate": 1.496120948267018e-05, "loss": 0.0621, "step": 4659, "task_loss": 0.072751984000206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910722942655152, "compression_loss": 0.0, "distillation_loss": 0.02917177602648735, "epoch": 4.43, "learning_rate": 1.4951451932955534e-05, "loss": 0.0277, "step": 4660, "task_loss": 0.014873206615447998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910794060859996, "compression_loss": 0.0, "distillation_loss": 0.05168045312166214, "epoch": 4.43, "learning_rate": 1.4941696208608056e-05, "loss": 0.0492, "step": 4661, "task_loss": 0.027083786204457283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910865141286245, "compression_loss": 0.0, "distillation_loss": 0.09697671234607697, "epoch": 4.43, "learning_rate": 1.4931942311399896e-05, "loss": 0.1167, "step": 4662, "task_loss": 0.29466700553894043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7910936183943937, "compression_loss": 0.0, "distillation_loss": 0.09114208817481995, "epoch": 4.43, "learning_rate": 1.4922190243102905e-05, "loss": 0.0878, "step": 4663, "task_loss": 0.057473860681056976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911007188843109, "compression_loss": 0.0, "distillation_loss": 0.05134554207324982, "epoch": 4.43, "learning_rate": 1.4912440005488593e-05, "loss": 0.0732, "step": 4664, "task_loss": 0.2698168158531189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911078155993797, "compression_loss": 0.0, "distillation_loss": 0.024719703942537308, "epoch": 4.43, "learning_rate": 1.4902691600328134e-05, "loss": 0.023, "step": 4665, "task_loss": 0.0074704308062791824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911149085406038, "compression_loss": 0.0, "distillation_loss": 0.11209557205438614, "epoch": 4.43, "learning_rate": 1.489294502939238e-05, "loss": 0.1162, "step": 4666, "task_loss": 0.15347597002983093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911219977089871, "compression_loss": 0.0, "distillation_loss": 0.08372636884450912, "epoch": 4.43, "learning_rate": 1.4883200294451832e-05, "loss": 0.0796, "step": 4667, "task_loss": 0.04254281520843506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911290831055329, "compression_loss": 0.0, "distillation_loss": 0.045971132814884186, "epoch": 4.43, "learning_rate": 1.4873457397276675e-05, "loss": 0.0433, "step": 4668, "task_loss": 0.0188460536301136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911361647312452, "compression_loss": 0.0, "distillation_loss": 0.012892801314592361, "epoch": 4.43, "learning_rate": 1.4863716339636746e-05, "loss": 0.0171, "step": 4669, "task_loss": 0.05512375757098198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911432425871274, "compression_loss": 0.0, "distillation_loss": 0.05050332099199295, "epoch": 4.43, "learning_rate": 1.4853977123301565e-05, "loss": 0.062, "step": 4670, "task_loss": 0.16584210097789764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911503166741836, "compression_loss": 0.0, "distillation_loss": 0.030305128544569016, "epoch": 4.44, "learning_rate": 1.4844239750040308e-05, "loss": 0.0305, "step": 4671, "task_loss": 0.0323617160320282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791157386993417, "compression_loss": 0.0, "distillation_loss": 0.06064460799098015, "epoch": 4.44, "learning_rate": 1.483450422162181e-05, "loss": 0.0614, "step": 4672, "task_loss": 0.06825106590986252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911644535458315, "compression_loss": 0.0, "distillation_loss": 0.040299929678440094, "epoch": 4.44, "learning_rate": 1.4824770539814575e-05, "loss": 0.0375, "step": 4673, "task_loss": 0.012086659669876099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911715163324309, "compression_loss": 0.0, "distillation_loss": 0.05591391772031784, "epoch": 4.44, "learning_rate": 1.4815038706386777e-05, "loss": 0.06, "step": 4674, "task_loss": 0.09698733687400818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911785753542188, "compression_loss": 0.0, "distillation_loss": 0.038244374096393585, "epoch": 4.44, "learning_rate": 1.4805308723106248e-05, "loss": 0.0403, "step": 4675, "task_loss": 0.0591559074819088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911856306121987, "compression_loss": 0.0, "distillation_loss": 0.0506531223654747, "epoch": 4.44, "learning_rate": 1.4795580591740493e-05, "loss": 0.0497, "step": 4676, "task_loss": 0.04083885997533798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911926821073746, "compression_loss": 0.0, "distillation_loss": 0.08757050335407257, "epoch": 4.44, "learning_rate": 1.4785854314056652e-05, "loss": 0.0904, "step": 4677, "task_loss": 0.1154625192284584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7911997298407498, "compression_loss": 0.0, "distillation_loss": 0.10649511963129044, "epoch": 4.44, "learning_rate": 1.4776129891821583e-05, "loss": 0.1165, "step": 4678, "task_loss": 0.2064911276102066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912067738133284, "compression_loss": 0.0, "distillation_loss": 0.06318517029285431, "epoch": 4.44, "learning_rate": 1.4766407326801751e-05, "loss": 0.0593, "step": 4679, "task_loss": 0.02464883401989937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912138140261138, "compression_loss": 0.0, "distillation_loss": 0.020300816744565964, "epoch": 4.44, "learning_rate": 1.4756686620763322e-05, "loss": 0.0197, "step": 4680, "task_loss": 0.013965649530291557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912208504801098, "compression_loss": 0.0, "distillation_loss": 0.01607617177069187, "epoch": 4.45, "learning_rate": 1.4746967775472093e-05, "loss": 0.0226, "step": 4681, "task_loss": 0.08178968727588654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79122788317632, "compression_loss": 0.0, "distillation_loss": 0.09621655941009521, "epoch": 4.45, "learning_rate": 1.4737250792693546e-05, "loss": 0.0925, "step": 4682, "task_loss": 0.05869336053729057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912349121157481, "compression_loss": 0.0, "distillation_loss": 0.02147580124437809, "epoch": 4.45, "learning_rate": 1.4727535674192825e-05, "loss": 0.0198, "step": 4683, "task_loss": 0.004298372194170952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912419372993978, "compression_loss": 0.0, "distillation_loss": 0.041834309697151184, "epoch": 4.45, "learning_rate": 1.4717822421734718e-05, "loss": 0.0438, "step": 4684, "task_loss": 0.06110651418566704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912489587282729, "compression_loss": 0.0, "distillation_loss": 0.045896559953689575, "epoch": 4.45, "learning_rate": 1.4708111037083683e-05, "loss": 0.0442, "step": 4685, "task_loss": 0.029097534716129303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912559764033769, "compression_loss": 0.0, "distillation_loss": 0.030547471717000008, "epoch": 4.45, "learning_rate": 1.4698401522003843e-05, "loss": 0.0281, "step": 4686, "task_loss": 0.006017416715621948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912629903257136, "compression_loss": 0.0, "distillation_loss": 0.022139865905046463, "epoch": 4.45, "learning_rate": 1.4688693878258991e-05, "loss": 0.0209, "step": 4687, "task_loss": 0.009355850517749786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912700004962866, "compression_loss": 0.0, "distillation_loss": 0.029008762910962105, "epoch": 4.45, "learning_rate": 1.4678988107612546e-05, "loss": 0.0366, "step": 4688, "task_loss": 0.10471072793006897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912770069160996, "compression_loss": 0.0, "distillation_loss": 0.019571123644709587, "epoch": 4.45, "learning_rate": 1.4669284211827622e-05, "loss": 0.0302, "step": 4689, "task_loss": 0.12538588047027588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912840095861563, "compression_loss": 0.0, "distillation_loss": 0.08734140545129776, "epoch": 4.45, "learning_rate": 1.4659582192666977e-05, "loss": 0.0883, "step": 4690, "task_loss": 0.09734951704740524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912910085074604, "compression_loss": 0.0, "distillation_loss": 0.021236347034573555, "epoch": 4.45, "learning_rate": 1.4649882051893022e-05, "loss": 0.0276, "step": 4691, "task_loss": 0.08498524129390717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7912980036810155, "compression_loss": 0.0, "distillation_loss": 0.07887224853038788, "epoch": 4.46, "learning_rate": 1.464018379126784e-05, "loss": 0.0804, "step": 4692, "task_loss": 0.09387266635894775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913049951078255, "compression_loss": 0.0, "distillation_loss": 0.06495728343725204, "epoch": 4.46, "learning_rate": 1.4630487412553168e-05, "loss": 0.0835, "step": 4693, "task_loss": 0.2504326403141022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913119827888938, "compression_loss": 0.0, "distillation_loss": 0.0405736081302166, "epoch": 4.46, "learning_rate": 1.4620792917510395e-05, "loss": 0.06, "step": 4694, "task_loss": 0.2343919277191162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913189667252243, "compression_loss": 0.0, "distillation_loss": 0.05520131438970566, "epoch": 4.46, "learning_rate": 1.4611100307900572e-05, "loss": 0.0586, "step": 4695, "task_loss": 0.08893582224845886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913259469178205, "compression_loss": 0.0, "distillation_loss": 0.05112463980913162, "epoch": 4.46, "learning_rate": 1.4601409585484413e-05, "loss": 0.0487, "step": 4696, "task_loss": 0.02662830613553524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913329233676862, "compression_loss": 0.0, "distillation_loss": 0.0339503176510334, "epoch": 4.46, "learning_rate": 1.4591720752022286e-05, "loss": 0.0313, "step": 4697, "task_loss": 0.007592087611556053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913398960758251, "compression_loss": 0.0, "distillation_loss": 0.01650651916861534, "epoch": 4.46, "learning_rate": 1.4582033809274215e-05, "loss": 0.0153, "step": 4698, "task_loss": 0.004532504826784134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913468650432408, "compression_loss": 0.0, "distillation_loss": 0.08176242560148239, "epoch": 4.46, "learning_rate": 1.4572348758999877e-05, "loss": 0.0844, "step": 4699, "task_loss": 0.10765102505683899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913538302709371, "compression_loss": 0.0, "distillation_loss": 0.036356210708618164, "epoch": 4.46, "learning_rate": 1.4562665602958592e-05, "loss": 0.0466, "step": 4700, "task_loss": 0.13854879140853882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913607917599175, "compression_loss": 0.0, "distillation_loss": 0.14445237815380096, "epoch": 4.46, "learning_rate": 1.4552984342909382e-05, "loss": 0.1413, "step": 4701, "task_loss": 0.11329984664916992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913677495111858, "compression_loss": 0.0, "distillation_loss": 0.015349620953202248, "epoch": 4.47, "learning_rate": 1.4543304980610878e-05, "loss": 0.0141, "step": 4702, "task_loss": 0.003350917249917984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913747035257457, "compression_loss": 0.0, "distillation_loss": 0.11373439431190491, "epoch": 4.47, "learning_rate": 1.4533627517821374e-05, "loss": 0.127, "step": 4703, "task_loss": 0.24672245979309082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913816538046009, "compression_loss": 0.0, "distillation_loss": 0.09408354014158249, "epoch": 4.47, "learning_rate": 1.452395195629884e-05, "loss": 0.0891, "step": 4704, "task_loss": 0.04441932588815689 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913886003487549, "compression_loss": 0.0, "distillation_loss": 0.09586971998214722, "epoch": 4.47, "learning_rate": 1.4514278297800893e-05, "loss": 0.0924, "step": 4705, "task_loss": 0.06134894862771034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7913955431592116, "compression_loss": 0.0, "distillation_loss": 0.09412407875061035, "epoch": 4.47, "learning_rate": 1.4504606544084798e-05, "loss": 0.0965, "step": 4706, "task_loss": 0.1174854040145874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914024822369746, "compression_loss": 0.0, "distillation_loss": 0.07334903627634048, "epoch": 4.47, "learning_rate": 1.4494936696907458e-05, "loss": 0.0821, "step": 4707, "task_loss": 0.1608203798532486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914094175830476, "compression_loss": 0.0, "distillation_loss": 0.023582283407449722, "epoch": 4.47, "learning_rate": 1.4485268758025466e-05, "loss": 0.0217, "step": 4708, "task_loss": 0.0046432409435510635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914163491984342, "compression_loss": 0.0, "distillation_loss": 0.06829246878623962, "epoch": 4.47, "learning_rate": 1.4475602729195048e-05, "loss": 0.0707, "step": 4709, "task_loss": 0.09261967241764069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914232770841382, "compression_loss": 0.0, "distillation_loss": 0.05515692010521889, "epoch": 4.47, "learning_rate": 1.446593861217207e-05, "loss": 0.059, "step": 4710, "task_loss": 0.09347251802682877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914302012411631, "compression_loss": 0.0, "distillation_loss": 0.0656106099486351, "epoch": 4.47, "learning_rate": 1.4456276408712083e-05, "loss": 0.0618, "step": 4711, "task_loss": 0.027708284556865692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914371216705128, "compression_loss": 0.0, "distillation_loss": 0.02283991128206253, "epoch": 4.47, "learning_rate": 1.4446616120570258e-05, "loss": 0.0257, "step": 4712, "task_loss": 0.051784027367830276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914440383731909, "compression_loss": 0.0, "distillation_loss": 0.11995580047369003, "epoch": 4.48, "learning_rate": 1.443695774950145e-05, "loss": 0.1159, "step": 4713, "task_loss": 0.07978808879852295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791450951350201, "compression_loss": 0.0, "distillation_loss": 0.16006718575954437, "epoch": 4.48, "learning_rate": 1.4427301297260129e-05, "loss": 0.1553, "step": 4714, "task_loss": 0.1124170571565628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791457860602547, "compression_loss": 0.0, "distillation_loss": 0.09785357117652893, "epoch": 4.48, "learning_rate": 1.4417646765600457e-05, "loss": 0.0959, "step": 4715, "task_loss": 0.07808717340230942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914647661312323, "compression_loss": 0.0, "distillation_loss": 0.06697249412536621, "epoch": 4.48, "learning_rate": 1.4407994156276212e-05, "loss": 0.0734, "step": 4716, "task_loss": 0.131184920668602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914716679372608, "compression_loss": 0.0, "distillation_loss": 0.04526514932513237, "epoch": 4.48, "learning_rate": 1.4398343471040831e-05, "loss": 0.0416, "step": 4717, "task_loss": 0.009023293852806091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791478566021636, "compression_loss": 0.0, "distillation_loss": 0.03617861866950989, "epoch": 4.48, "learning_rate": 1.438869471164743e-05, "loss": 0.0367, "step": 4718, "task_loss": 0.04139818996191025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914854603853618, "compression_loss": 0.0, "distillation_loss": 0.013502972200512886, "epoch": 4.48, "learning_rate": 1.4379047879848736e-05, "loss": 0.0128, "step": 4719, "task_loss": 0.006139175966382027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914923510294417, "compression_loss": 0.0, "distillation_loss": 0.03151702880859375, "epoch": 4.48, "learning_rate": 1.4369402977397148e-05, "loss": 0.0295, "step": 4720, "task_loss": 0.011361634358763695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7914992379548794, "compression_loss": 0.0, "distillation_loss": 0.08465395122766495, "epoch": 4.48, "learning_rate": 1.4359760006044686e-05, "loss": 0.0813, "step": 4721, "task_loss": 0.051406532526016235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915061211626787, "compression_loss": 0.0, "distillation_loss": 0.02832719124853611, "epoch": 4.48, "learning_rate": 1.435011896754308e-05, "loss": 0.0412, "step": 4722, "task_loss": 0.15668340027332306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915130006538432, "compression_loss": 0.0, "distillation_loss": 0.05229802057147026, "epoch": 4.49, "learning_rate": 1.4340479863643658e-05, "loss": 0.0571, "step": 4723, "task_loss": 0.10068703442811966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915198764293766, "compression_loss": 0.0, "distillation_loss": 0.019345447421073914, "epoch": 4.49, "learning_rate": 1.4330842696097393e-05, "loss": 0.018, "step": 4724, "task_loss": 0.006194958463311195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915267484902825, "compression_loss": 0.0, "distillation_loss": 0.08192586153745651, "epoch": 4.49, "learning_rate": 1.4321207466654945e-05, "loss": 0.0811, "step": 4725, "task_loss": 0.07349298149347305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915336168375647, "compression_loss": 0.0, "distillation_loss": 0.12662634253501892, "epoch": 4.49, "learning_rate": 1.4311574177066594e-05, "loss": 0.1275, "step": 4726, "task_loss": 0.1349095106124878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915404814722269, "compression_loss": 0.0, "distillation_loss": 0.01997673325240612, "epoch": 4.49, "learning_rate": 1.4301942829082265e-05, "loss": 0.0185, "step": 4727, "task_loss": 0.005590014159679413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915473423952726, "compression_loss": 0.0, "distillation_loss": 0.025746600702404976, "epoch": 4.49, "learning_rate": 1.4292313424451536e-05, "loss": 0.024, "step": 4728, "task_loss": 0.007853610441088676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915541996077057, "compression_loss": 0.0, "distillation_loss": 0.03533206880092621, "epoch": 4.49, "learning_rate": 1.4282685964923642e-05, "loss": 0.0372, "step": 4729, "task_loss": 0.05367380380630493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915610531105297, "compression_loss": 0.0, "distillation_loss": 0.06636801362037659, "epoch": 4.49, "learning_rate": 1.427306045224747e-05, "loss": 0.0671, "step": 4730, "task_loss": 0.07363536953926086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915679029047484, "compression_loss": 0.0, "distillation_loss": 0.15005621314048767, "epoch": 4.49, "learning_rate": 1.4263436888171516e-05, "loss": 0.1441, "step": 4731, "task_loss": 0.0903296247124672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915747489913655, "compression_loss": 0.0, "distillation_loss": 0.14973436295986176, "epoch": 4.49, "learning_rate": 1.4253815274443965e-05, "loss": 0.1512, "step": 4732, "task_loss": 0.16403284668922424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915815913713846, "compression_loss": 0.0, "distillation_loss": 0.04621317982673645, "epoch": 4.49, "learning_rate": 1.424419561281263e-05, "loss": 0.0627, "step": 4733, "task_loss": 0.21088391542434692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915884300458095, "compression_loss": 0.0, "distillation_loss": 0.021405549719929695, "epoch": 4.5, "learning_rate": 1.423457790502496e-05, "loss": 0.0199, "step": 4734, "task_loss": 0.0061973873525857925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7915952650156436, "compression_loss": 0.0, "distillation_loss": 0.07480795681476593, "epoch": 4.5, "learning_rate": 1.4224962152828054e-05, "loss": 0.0678, "step": 4735, "task_loss": 0.004553038626909256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916020962818909, "compression_loss": 0.0, "distillation_loss": 0.023463796824216843, "epoch": 4.5, "learning_rate": 1.4215348357968669e-05, "loss": 0.0456, "step": 4736, "task_loss": 0.24516035616397858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916089238455549, "compression_loss": 0.0, "distillation_loss": 0.019841229543089867, "epoch": 4.5, "learning_rate": 1.4205736522193197e-05, "loss": 0.0224, "step": 4737, "task_loss": 0.0451948307454586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916157477076394, "compression_loss": 0.0, "distillation_loss": 0.11894679069519043, "epoch": 4.5, "learning_rate": 1.4196126647247654e-05, "loss": 0.1265, "step": 4738, "task_loss": 0.19399774074554443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916225678691481, "compression_loss": 0.0, "distillation_loss": 0.023783180862665176, "epoch": 4.5, "learning_rate": 1.4186518734877757e-05, "loss": 0.0354, "step": 4739, "task_loss": 0.13973920047283173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916293843310844, "compression_loss": 0.0, "distillation_loss": 0.04689887911081314, "epoch": 4.5, "learning_rate": 1.4176912786828808e-05, "loss": 0.0544, "step": 4740, "task_loss": 0.12143304944038391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916361970944523, "compression_loss": 0.0, "distillation_loss": 0.02539118006825447, "epoch": 4.5, "learning_rate": 1.4167308804845774e-05, "loss": 0.0285, "step": 4741, "task_loss": 0.05643775314092636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916430061602554, "compression_loss": 0.0, "distillation_loss": 0.04983173683285713, "epoch": 4.5, "learning_rate": 1.4157706790673262e-05, "loss": 0.0469, "step": 4742, "task_loss": 0.020453961566090584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916498115294973, "compression_loss": 0.0, "distillation_loss": 0.023538459092378616, "epoch": 4.5, "learning_rate": 1.4148106746055535e-05, "loss": 0.0218, "step": 4743, "task_loss": 0.0058768633753061295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916566132031818, "compression_loss": 0.0, "distillation_loss": 0.07629226893186569, "epoch": 4.51, "learning_rate": 1.4138508672736483e-05, "loss": 0.0728, "step": 4744, "task_loss": 0.04148370400071144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916634111823124, "compression_loss": 0.0, "distillation_loss": 0.049477558583021164, "epoch": 4.51, "learning_rate": 1.4128912572459629e-05, "loss": 0.0464, "step": 4745, "task_loss": 0.018751783296465874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791670205467893, "compression_loss": 0.0, "distillation_loss": 0.1168399229645729, "epoch": 4.51, "learning_rate": 1.4119318446968171e-05, "loss": 0.1262, "step": 4746, "task_loss": 0.21064327657222748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916769960609271, "compression_loss": 0.0, "distillation_loss": 0.03802032768726349, "epoch": 4.51, "learning_rate": 1.4109726298004911e-05, "loss": 0.0437, "step": 4747, "task_loss": 0.09514382481575012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916837829624186, "compression_loss": 0.0, "distillation_loss": 0.05071219056844711, "epoch": 4.51, "learning_rate": 1.4100136127312324e-05, "loss": 0.0538, "step": 4748, "task_loss": 0.0818718820810318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916905661733709, "compression_loss": 0.0, "distillation_loss": 0.05683235824108124, "epoch": 4.51, "learning_rate": 1.4090547936632494e-05, "loss": 0.0543, "step": 4749, "task_loss": 0.03133031725883484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7916973456947879, "compression_loss": 0.0, "distillation_loss": 0.02370285615324974, "epoch": 4.51, "learning_rate": 1.4080961727707184e-05, "loss": 0.0217, "step": 4750, "task_loss": 0.0038769226521253586 }, { "epoch": 4.51, "eval_accuracy": 0.8922018348623854, "eval_loss": 0.4650850296020508, "eval_runtime": 18.1377, "eval_samples_per_second": 48.077, "eval_steps_per_second": 6.01, "step": 4750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917041215276731, "compression_loss": 0.0, "distillation_loss": 0.02110632136464119, "epoch": 4.51, "learning_rate": 1.4071377502277764e-05, "loss": 0.0196, "step": 4751, "task_loss": 0.005888473242521286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917108936730304, "compression_loss": 0.0, "distillation_loss": 0.04167748987674713, "epoch": 4.51, "learning_rate": 1.4061795262085243e-05, "loss": 0.0383, "step": 4752, "task_loss": 0.007969718426465988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917176621318633, "compression_loss": 0.0, "distillation_loss": 0.03811153396964073, "epoch": 4.51, "learning_rate": 1.4052215008870299e-05, "loss": 0.0526, "step": 4753, "task_loss": 0.1829942911863327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917244269051756, "compression_loss": 0.0, "distillation_loss": 0.08471325784921646, "epoch": 4.51, "learning_rate": 1.4042636744373225e-05, "loss": 0.0854, "step": 4754, "task_loss": 0.09121614694595337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791731187993971, "compression_loss": 0.0, "distillation_loss": 0.08335433900356293, "epoch": 4.52, "learning_rate": 1.4033060470333948e-05, "loss": 0.0847, "step": 4755, "task_loss": 0.09658505022525787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791737945399253, "compression_loss": 0.0, "distillation_loss": 0.09752035140991211, "epoch": 4.52, "learning_rate": 1.4023486188492052e-05, "loss": 0.0917, "step": 4756, "task_loss": 0.03966911882162094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917446991220255, "compression_loss": 0.0, "distillation_loss": 0.05041617155075073, "epoch": 4.52, "learning_rate": 1.4013913900586767e-05, "loss": 0.0554, "step": 4757, "task_loss": 0.10072137415409088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791751449163292, "compression_loss": 0.0, "distillation_loss": 0.03461472690105438, "epoch": 4.52, "learning_rate": 1.4004343608356928e-05, "loss": 0.0325, "step": 4758, "task_loss": 0.013332528993487358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917581955240564, "compression_loss": 0.0, "distillation_loss": 0.11117596924304962, "epoch": 4.52, "learning_rate": 1.399477531354102e-05, "loss": 0.1109, "step": 4759, "task_loss": 0.10825280100107193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917649382053221, "compression_loss": 0.0, "distillation_loss": 0.12476976960897446, "epoch": 4.52, "learning_rate": 1.3985209017877189e-05, "loss": 0.1206, "step": 4760, "task_loss": 0.08343881368637085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917716772080929, "compression_loss": 0.0, "distillation_loss": 0.10303467512130737, "epoch": 4.52, "learning_rate": 1.3975644723103185e-05, "loss": 0.1027, "step": 4761, "task_loss": 0.09963302314281464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917784125333727, "compression_loss": 0.0, "distillation_loss": 0.10136422514915466, "epoch": 4.52, "learning_rate": 1.39660824309564e-05, "loss": 0.0952, "step": 4762, "task_loss": 0.03952625393867493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917851441821648, "compression_loss": 0.0, "distillation_loss": 0.04904685169458389, "epoch": 4.52, "learning_rate": 1.3956522143173894e-05, "loss": 0.0537, "step": 4763, "task_loss": 0.09588026255369186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917918721554732, "compression_loss": 0.0, "distillation_loss": 0.04798861965537071, "epoch": 4.52, "learning_rate": 1.3946963861492319e-05, "loss": 0.0518, "step": 4764, "task_loss": 0.08626765012741089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7917985964543014, "compression_loss": 0.0, "distillation_loss": 0.015419387258589268, "epoch": 4.53, "learning_rate": 1.3937407587648e-05, "loss": 0.0148, "step": 4765, "task_loss": 0.009636864066123962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918053170796532, "compression_loss": 0.0, "distillation_loss": 0.09505701810121536, "epoch": 4.53, "learning_rate": 1.3927853323376855e-05, "loss": 0.0958, "step": 4766, "task_loss": 0.10282032191753387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918120340325321, "compression_loss": 0.0, "distillation_loss": 0.07270844280719757, "epoch": 4.53, "learning_rate": 1.391830107041449e-05, "loss": 0.0779, "step": 4767, "task_loss": 0.12441182881593704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791818747313942, "compression_loss": 0.0, "distillation_loss": 0.1424996554851532, "epoch": 4.53, "learning_rate": 1.390875083049611e-05, "loss": 0.1441, "step": 4768, "task_loss": 0.15887659788131714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918254569248865, "compression_loss": 0.0, "distillation_loss": 0.020403150469064713, "epoch": 4.53, "learning_rate": 1.3899202605356542e-05, "loss": 0.0188, "step": 4769, "task_loss": 0.00390862300992012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918321628663691, "compression_loss": 0.0, "distillation_loss": 0.19000548124313354, "epoch": 4.53, "learning_rate": 1.38896563967303e-05, "loss": 0.1851, "step": 4770, "task_loss": 0.14091481268405914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918388651393938, "compression_loss": 0.0, "distillation_loss": 0.06574898958206177, "epoch": 4.53, "learning_rate": 1.3880112206351475e-05, "loss": 0.0741, "step": 4771, "task_loss": 0.14917248487472534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918455637449642, "compression_loss": 0.0, "distillation_loss": 0.21178287267684937, "epoch": 4.53, "learning_rate": 1.3870570035953811e-05, "loss": 0.2009, "step": 4772, "task_loss": 0.10322417318820953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918522586840838, "compression_loss": 0.0, "distillation_loss": 0.03703705966472626, "epoch": 4.53, "learning_rate": 1.3861029887270705e-05, "loss": 0.0342, "step": 4773, "task_loss": 0.008660474792122841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918589499577564, "compression_loss": 0.0, "distillation_loss": 0.06054367870092392, "epoch": 4.53, "learning_rate": 1.3851491762035173e-05, "loss": 0.0626, "step": 4774, "task_loss": 0.08063255250453949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918656375669858, "compression_loss": 0.0, "distillation_loss": 0.06310294568538666, "epoch": 4.53, "learning_rate": 1.3841955661979856e-05, "loss": 0.0585, "step": 4775, "task_loss": 0.017228560522198677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918723215127754, "compression_loss": 0.0, "distillation_loss": 0.05638519302010536, "epoch": 4.54, "learning_rate": 1.383242158883702e-05, "loss": 0.0603, "step": 4776, "task_loss": 0.09584720432758331 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918790017961291, "compression_loss": 0.0, "distillation_loss": 0.07800333946943283, "epoch": 4.54, "learning_rate": 1.3822889544338596e-05, "loss": 0.0848, "step": 4777, "task_loss": 0.14598438143730164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918856784180506, "compression_loss": 0.0, "distillation_loss": 0.03396276757121086, "epoch": 4.54, "learning_rate": 1.3813359530216113e-05, "loss": 0.0467, "step": 4778, "task_loss": 0.1610591858625412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918923513795434, "compression_loss": 0.0, "distillation_loss": 0.04296828433871269, "epoch": 4.54, "learning_rate": 1.3803831548200741e-05, "loss": 0.0402, "step": 4779, "task_loss": 0.014961333945393562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7918990206816113, "compression_loss": 0.0, "distillation_loss": 0.0331374853849411, "epoch": 4.54, "learning_rate": 1.3794305600023296e-05, "loss": 0.0358, "step": 4780, "task_loss": 0.05958189442753792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791905686325258, "compression_loss": 0.0, "distillation_loss": 0.017804235219955444, "epoch": 4.54, "learning_rate": 1.37847816874142e-05, "loss": 0.0163, "step": 4781, "task_loss": 0.0025765616446733475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919123483114872, "compression_loss": 0.0, "distillation_loss": 0.03256119042634964, "epoch": 4.54, "learning_rate": 1.377525981210353e-05, "loss": 0.0302, "step": 4782, "task_loss": 0.008887385949492455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919190066413024, "compression_loss": 0.0, "distillation_loss": 0.020645175129175186, "epoch": 4.54, "learning_rate": 1.3765739975820962e-05, "loss": 0.0188, "step": 4783, "task_loss": 0.0026280879974365234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919256613157075, "compression_loss": 0.0, "distillation_loss": 0.036273565143346786, "epoch": 4.54, "learning_rate": 1.3756222180295848e-05, "loss": 0.0377, "step": 4784, "task_loss": 0.05044960230588913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919323123357062, "compression_loss": 0.0, "distillation_loss": 0.027985138818621635, "epoch": 4.54, "learning_rate": 1.3746706427257122e-05, "loss": 0.0387, "step": 4785, "task_loss": 0.13473522663116455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.791938959702302, "compression_loss": 0.0, "distillation_loss": 0.0791940987110138, "epoch": 4.55, "learning_rate": 1.3737192718433362e-05, "loss": 0.0858, "step": 4786, "task_loss": 0.1456226408481598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919456034164986, "compression_loss": 0.0, "distillation_loss": 0.03578998148441315, "epoch": 4.55, "learning_rate": 1.3727681055552797e-05, "loss": 0.0327, "step": 4787, "task_loss": 0.004547275602817535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919522434792998, "compression_loss": 0.0, "distillation_loss": 0.044103555381298065, "epoch": 4.55, "learning_rate": 1.3718171440343253e-05, "loss": 0.0404, "step": 4788, "task_loss": 0.006830913946032524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919588798917092, "compression_loss": 0.0, "distillation_loss": 0.019968142732977867, "epoch": 4.55, "learning_rate": 1.3708663874532196e-05, "loss": 0.0184, "step": 4789, "task_loss": 0.0039040017873048782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919655126547306, "compression_loss": 0.0, "distillation_loss": 0.04486335813999176, "epoch": 4.55, "learning_rate": 1.369915835984672e-05, "loss": 0.0521, "step": 4790, "task_loss": 0.11712378263473511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919721417693675, "compression_loss": 0.0, "distillation_loss": 0.07928772270679474, "epoch": 4.55, "learning_rate": 1.3689654898013568e-05, "loss": 0.0866, "step": 4791, "task_loss": 0.15211215615272522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919787672366236, "compression_loss": 0.0, "distillation_loss": 0.08449915051460266, "epoch": 4.55, "learning_rate": 1.3680153490759073e-05, "loss": 0.0801, "step": 4792, "task_loss": 0.04078545793890953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919853890575028, "compression_loss": 0.0, "distillation_loss": 0.030160902068018913, "epoch": 4.55, "learning_rate": 1.3670654139809202e-05, "loss": 0.0471, "step": 4793, "task_loss": 0.19971755146980286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919920072330086, "compression_loss": 0.0, "distillation_loss": 0.07142870128154755, "epoch": 4.55, "learning_rate": 1.3661156846889584e-05, "loss": 0.067, "step": 4794, "task_loss": 0.027513636276125908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7919986217641447, "compression_loss": 0.0, "distillation_loss": 0.06955306231975555, "epoch": 4.55, "learning_rate": 1.3651661613725428e-05, "loss": 0.0697, "step": 4795, "task_loss": 0.07086768746376038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920052326519148, "compression_loss": 0.0, "distillation_loss": 0.02464466169476509, "epoch": 4.55, "learning_rate": 1.3642168442041586e-05, "loss": 0.028, "step": 4796, "task_loss": 0.05863853171467781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920118398973226, "compression_loss": 0.0, "distillation_loss": 0.04839901998639107, "epoch": 4.56, "learning_rate": 1.3632677333562557e-05, "loss": 0.0461, "step": 4797, "task_loss": 0.0250839926302433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920184435013717, "compression_loss": 0.0, "distillation_loss": 0.07862424850463867, "epoch": 4.56, "learning_rate": 1.3623188290012434e-05, "loss": 0.0786, "step": 4798, "task_loss": 0.0784490555524826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792025043465066, "compression_loss": 0.0, "distillation_loss": 0.1141967624425888, "epoch": 4.56, "learning_rate": 1.361370131311494e-05, "loss": 0.1161, "step": 4799, "task_loss": 0.133285790681839 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920316397894089, "compression_loss": 0.0, "distillation_loss": 0.027825910598039627, "epoch": 4.56, "learning_rate": 1.3604216404593442e-05, "loss": 0.0255, "step": 4800, "task_loss": 0.004920231178402901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920382324754043, "compression_loss": 0.0, "distillation_loss": 0.06682395190000534, "epoch": 4.56, "learning_rate": 1.3594733566170926e-05, "loss": 0.0627, "step": 4801, "task_loss": 0.025574171915650368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920448215240558, "compression_loss": 0.0, "distillation_loss": 0.07818278670310974, "epoch": 4.56, "learning_rate": 1.3585252799569987e-05, "loss": 0.0918, "step": 4802, "task_loss": 0.21402281522750854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792051406936367, "compression_loss": 0.0, "distillation_loss": 0.03720712661743164, "epoch": 4.56, "learning_rate": 1.357577410651284e-05, "loss": 0.0382, "step": 4803, "task_loss": 0.047226957976818085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920579887133417, "compression_loss": 0.0, "distillation_loss": 0.069380983710289, "epoch": 4.56, "learning_rate": 1.3566297488721352e-05, "loss": 0.0696, "step": 4804, "task_loss": 0.07113489508628845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920645668559835, "compression_loss": 0.0, "distillation_loss": 0.029062392190098763, "epoch": 4.56, "learning_rate": 1.3556822947916998e-05, "loss": 0.0366, "step": 4805, "task_loss": 0.10417832434177399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920711413652961, "compression_loss": 0.0, "distillation_loss": 0.024266686290502548, "epoch": 4.56, "learning_rate": 1.354735048582086e-05, "loss": 0.0301, "step": 4806, "task_loss": 0.08248142898082733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920777122422833, "compression_loss": 0.0, "distillation_loss": 0.06912650167942047, "epoch": 4.57, "learning_rate": 1.3537880104153644e-05, "loss": 0.0707, "step": 4807, "task_loss": 0.0849725604057312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920842794879486, "compression_loss": 0.0, "distillation_loss": 0.016859427094459534, "epoch": 4.57, "learning_rate": 1.3528411804635732e-05, "loss": 0.0157, "step": 4808, "task_loss": 0.004863811656832695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920908431032958, "compression_loss": 0.0, "distillation_loss": 0.023333366960287094, "epoch": 4.57, "learning_rate": 1.3518945588987062e-05, "loss": 0.0282, "step": 4809, "task_loss": 0.07221191376447678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7920974030893286, "compression_loss": 0.0, "distillation_loss": 0.0138099305331707, "epoch": 4.57, "learning_rate": 1.3509481458927209e-05, "loss": 0.0365, "step": 4810, "task_loss": 0.24029265344142914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921039594470506, "compression_loss": 0.0, "distillation_loss": 0.0632215216755867, "epoch": 4.57, "learning_rate": 1.3500019416175396e-05, "loss": 0.0725, "step": 4811, "task_loss": 0.15620213747024536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921105121774655, "compression_loss": 0.0, "distillation_loss": 0.10874255001544952, "epoch": 4.57, "learning_rate": 1.3490559462450445e-05, "loss": 0.103, "step": 4812, "task_loss": 0.051314111799001694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792117061281577, "compression_loss": 0.0, "distillation_loss": 0.11337900161743164, "epoch": 4.57, "learning_rate": 1.3481101599470794e-05, "loss": 0.106, "step": 4813, "task_loss": 0.03977866470813751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921236067603888, "compression_loss": 0.0, "distillation_loss": 0.14787593483924866, "epoch": 4.57, "learning_rate": 1.3471645828954504e-05, "loss": 0.1417, "step": 4814, "task_loss": 0.0858084037899971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921301486149046, "compression_loss": 0.0, "distillation_loss": 0.07990266382694244, "epoch": 4.57, "learning_rate": 1.346219215261928e-05, "loss": 0.0812, "step": 4815, "task_loss": 0.09328167140483856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921366868461279, "compression_loss": 0.0, "distillation_loss": 0.029033560305833817, "epoch": 4.57, "learning_rate": 1.345274057218241e-05, "loss": 0.0336, "step": 4816, "task_loss": 0.07436929643154144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921432214550627, "compression_loss": 0.0, "distillation_loss": 0.0795164555311203, "epoch": 4.57, "learning_rate": 1.3443291089360827e-05, "loss": 0.0783, "step": 4817, "task_loss": 0.06710808724164963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921497524427125, "compression_loss": 0.0, "distillation_loss": 0.1971079707145691, "epoch": 4.58, "learning_rate": 1.3433843705871086e-05, "loss": 0.205, "step": 4818, "task_loss": 0.27602148056030273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921562798100809, "compression_loss": 0.0, "distillation_loss": 0.1079106554389, "epoch": 4.58, "learning_rate": 1.3424398423429335e-05, "loss": 0.1086, "step": 4819, "task_loss": 0.11473225057125092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921628035581716, "compression_loss": 0.0, "distillation_loss": 0.06394623965024948, "epoch": 4.58, "learning_rate": 1.3414955243751362e-05, "loss": 0.065, "step": 4820, "task_loss": 0.07409561425447464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921693236879885, "compression_loss": 0.0, "distillation_loss": 0.038549233227968216, "epoch": 4.58, "learning_rate": 1.3405514168552552e-05, "loss": 0.0359, "step": 4821, "task_loss": 0.011564519256353378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921758402005351, "compression_loss": 0.0, "distillation_loss": 0.035132892429828644, "epoch": 4.58, "learning_rate": 1.3396075199547944e-05, "loss": 0.0475, "step": 4822, "task_loss": 0.1591106653213501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921823530968151, "compression_loss": 0.0, "distillation_loss": 0.12006336450576782, "epoch": 4.58, "learning_rate": 1.3386638338452162e-05, "loss": 0.1215, "step": 4823, "task_loss": 0.13471020758152008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921888623778321, "compression_loss": 0.0, "distillation_loss": 0.11836139857769012, "epoch": 4.58, "learning_rate": 1.3377203586979444e-05, "loss": 0.1116, "step": 4824, "task_loss": 0.050610434263944626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7921953680445901, "compression_loss": 0.0, "distillation_loss": 0.03372761979699135, "epoch": 4.58, "learning_rate": 1.3367770946843671e-05, "loss": 0.0362, "step": 4825, "task_loss": 0.057958073914051056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922018700980924, "compression_loss": 0.0, "distillation_loss": 0.018163051456212997, "epoch": 4.58, "learning_rate": 1.3358340419758339e-05, "loss": 0.021, "step": 4826, "task_loss": 0.046153098344802856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792208368539343, "compression_loss": 0.0, "distillation_loss": 0.022957507520914078, "epoch": 4.58, "learning_rate": 1.3348912007436537e-05, "loss": 0.0211, "step": 4827, "task_loss": 0.004677103832364082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922148633693453, "compression_loss": 0.0, "distillation_loss": 0.09547284990549088, "epoch": 4.58, "learning_rate": 1.3339485711590965e-05, "loss": 0.0951, "step": 4828, "task_loss": 0.09193813055753708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922213545891031, "compression_loss": 0.0, "distillation_loss": 0.031036367639899254, "epoch": 4.59, "learning_rate": 1.3330061533933987e-05, "loss": 0.0287, "step": 4829, "task_loss": 0.008153628557920456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922278421996202, "compression_loss": 0.0, "distillation_loss": 0.06869910657405853, "epoch": 4.59, "learning_rate": 1.3320639476177533e-05, "loss": 0.0683, "step": 4830, "task_loss": 0.06460073590278625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922343262019, "compression_loss": 0.0, "distillation_loss": 0.02499844878911972, "epoch": 4.59, "learning_rate": 1.3311219540033156e-05, "loss": 0.023, "step": 4831, "task_loss": 0.004916973412036896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922408065969465, "compression_loss": 0.0, "distillation_loss": 0.04321729391813278, "epoch": 4.59, "learning_rate": 1.3301801727212054e-05, "loss": 0.0431, "step": 4832, "task_loss": 0.04201708361506462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922472833857632, "compression_loss": 0.0, "distillation_loss": 0.04636396840214729, "epoch": 4.59, "learning_rate": 1.3292386039424998e-05, "loss": 0.0444, "step": 4833, "task_loss": 0.026649996638298035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922537565693538, "compression_loss": 0.0, "distillation_loss": 0.08710770308971405, "epoch": 4.59, "learning_rate": 1.328297247838241e-05, "loss": 0.0833, "step": 4834, "task_loss": 0.04921592399477959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922602261487222, "compression_loss": 0.0, "distillation_loss": 0.05042930692434311, "epoch": 4.59, "learning_rate": 1.3273561045794294e-05, "loss": 0.0674, "step": 4835, "task_loss": 0.21977365016937256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922666921248717, "compression_loss": 0.0, "distillation_loss": 0.13431815803050995, "epoch": 4.59, "learning_rate": 1.3264151743370299e-05, "loss": 0.1476, "step": 4836, "task_loss": 0.2666419744491577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922731544988061, "compression_loss": 0.0, "distillation_loss": 0.10049565136432648, "epoch": 4.59, "learning_rate": 1.3254744572819658e-05, "loss": 0.1039, "step": 4837, "task_loss": 0.13440871238708496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922796132715293, "compression_loss": 0.0, "distillation_loss": 0.07015521079301834, "epoch": 4.59, "learning_rate": 1.324533953585122e-05, "loss": 0.0672, "step": 4838, "task_loss": 0.04096302390098572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922860684440447, "compression_loss": 0.0, "distillation_loss": 0.040450118482112885, "epoch": 4.6, "learning_rate": 1.323593663417348e-05, "loss": 0.0478, "step": 4839, "task_loss": 0.11380739510059357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922925200173562, "compression_loss": 0.0, "distillation_loss": 0.02944648265838623, "epoch": 4.6, "learning_rate": 1.3226535869494505e-05, "loss": 0.0349, "step": 4840, "task_loss": 0.08415282517671585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7922989679924674, "compression_loss": 0.0, "distillation_loss": 0.08658148348331451, "epoch": 4.6, "learning_rate": 1.3217137243521981e-05, "loss": 0.09, "step": 4841, "task_loss": 0.12077930569648743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792305412370382, "compression_loss": 0.0, "distillation_loss": 0.05245388299226761, "epoch": 4.6, "learning_rate": 1.3207740757963225e-05, "loss": 0.0538, "step": 4842, "task_loss": 0.06580542027950287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923118531521036, "compression_loss": 0.0, "distillation_loss": 0.06145579740405083, "epoch": 4.6, "learning_rate": 1.3198346414525162e-05, "loss": 0.0614, "step": 4843, "task_loss": 0.06057172268629074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792318290338636, "compression_loss": 0.0, "distillation_loss": 0.01584722474217415, "epoch": 4.6, "learning_rate": 1.318895421491431e-05, "loss": 0.0147, "step": 4844, "task_loss": 0.004009943455457687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923247239309827, "compression_loss": 0.0, "distillation_loss": 0.04391245171427727, "epoch": 4.6, "learning_rate": 1.3179564160836794e-05, "loss": 0.0507, "step": 4845, "task_loss": 0.11139274388551712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923311539301476, "compression_loss": 0.0, "distillation_loss": 0.03385882452130318, "epoch": 4.6, "learning_rate": 1.317017625399839e-05, "loss": 0.0316, "step": 4846, "task_loss": 0.011443352326750755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923375803371343, "compression_loss": 0.0, "distillation_loss": 0.029054662212729454, "epoch": 4.6, "learning_rate": 1.3160790496104441e-05, "loss": 0.0302, "step": 4847, "task_loss": 0.040480926632881165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923440031529465, "compression_loss": 0.0, "distillation_loss": 0.10286715626716614, "epoch": 4.6, "learning_rate": 1.3151406888859907e-05, "loss": 0.1156, "step": 4848, "task_loss": 0.2303019016981125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923504223785878, "compression_loss": 0.0, "distillation_loss": 0.08016088604927063, "epoch": 4.6, "learning_rate": 1.3142025433969384e-05, "loss": 0.0915, "step": 4849, "task_loss": 0.19339843094348907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792356838015062, "compression_loss": 0.0, "distillation_loss": 0.03166133165359497, "epoch": 4.61, "learning_rate": 1.3132646133137053e-05, "loss": 0.0361, "step": 4850, "task_loss": 0.07562405616044998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923632500633726, "compression_loss": 0.0, "distillation_loss": 0.026462262496352196, "epoch": 4.61, "learning_rate": 1.3123268988066695e-05, "loss": 0.045, "step": 4851, "task_loss": 0.211809903383255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923696585245235, "compression_loss": 0.0, "distillation_loss": 0.02861833944916725, "epoch": 4.61, "learning_rate": 1.3113894000461721e-05, "loss": 0.0266, "step": 4852, "task_loss": 0.007970165461301804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923760633995182, "compression_loss": 0.0, "distillation_loss": 0.07544773817062378, "epoch": 4.61, "learning_rate": 1.3104521172025158e-05, "loss": 0.0724, "step": 4853, "task_loss": 0.04472661018371582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923824646893605, "compression_loss": 0.0, "distillation_loss": 0.06193629652261734, "epoch": 4.61, "learning_rate": 1.3095150504459614e-05, "loss": 0.0598, "step": 4854, "task_loss": 0.04085727035999298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792388862395054, "compression_loss": 0.0, "distillation_loss": 0.03407544270157814, "epoch": 4.61, "learning_rate": 1.3085781999467303e-05, "loss": 0.044, "step": 4855, "task_loss": 0.13290613889694214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7923952565176026, "compression_loss": 0.0, "distillation_loss": 0.15552052855491638, "epoch": 4.61, "learning_rate": 1.3076415658750083e-05, "loss": 0.1622, "step": 4856, "task_loss": 0.2220618724822998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924016470580096, "compression_loss": 0.0, "distillation_loss": 0.0764533281326294, "epoch": 4.61, "learning_rate": 1.3067051484009383e-05, "loss": 0.0794, "step": 4857, "task_loss": 0.10618744790554047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792408034017279, "compression_loss": 0.0, "distillation_loss": 0.08214154094457626, "epoch": 4.61, "learning_rate": 1.3057689476946238e-05, "loss": 0.0788, "step": 4858, "task_loss": 0.0489237904548645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924144173964144, "compression_loss": 0.0, "distillation_loss": 0.18950366973876953, "epoch": 4.61, "learning_rate": 1.304832963926132e-05, "loss": 0.1862, "step": 4859, "task_loss": 0.15664705634117126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924207971964193, "compression_loss": 0.0, "distillation_loss": 0.062167249619960785, "epoch": 4.62, "learning_rate": 1.303897197265489e-05, "loss": 0.0726, "step": 4860, "task_loss": 0.16604240238666534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924271734182977, "compression_loss": 0.0, "distillation_loss": 0.02140945754945278, "epoch": 4.62, "learning_rate": 1.3029616478826805e-05, "loss": 0.0202, "step": 4861, "task_loss": 0.009080074727535248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792433546063053, "compression_loss": 0.0, "distillation_loss": 0.020239079371094704, "epoch": 4.62, "learning_rate": 1.3020263159476526e-05, "loss": 0.0187, "step": 4862, "task_loss": 0.004914524033665657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924399151316892, "compression_loss": 0.0, "distillation_loss": 0.03199296444654465, "epoch": 4.62, "learning_rate": 1.301091201630315e-05, "loss": 0.0295, "step": 4863, "task_loss": 0.0070999301970005035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924462806252096, "compression_loss": 0.0, "distillation_loss": 0.05161367356777191, "epoch": 4.62, "learning_rate": 1.3001563051005347e-05, "loss": 0.0562, "step": 4864, "task_loss": 0.09769769012928009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924526425446181, "compression_loss": 0.0, "distillation_loss": 0.015561849810183048, "epoch": 4.62, "learning_rate": 1.2992216265281393e-05, "loss": 0.0211, "step": 4865, "task_loss": 0.07106789946556091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924590008909185, "compression_loss": 0.0, "distillation_loss": 0.03964412212371826, "epoch": 4.62, "learning_rate": 1.2982871660829191e-05, "loss": 0.0413, "step": 4866, "task_loss": 0.05661403387784958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924653556651141, "compression_loss": 0.0, "distillation_loss": 0.030754052102565765, "epoch": 4.62, "learning_rate": 1.2973529239346227e-05, "loss": 0.0281, "step": 4867, "task_loss": 0.004459032788872719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924717068682089, "compression_loss": 0.0, "distillation_loss": 0.029556550085544586, "epoch": 4.62, "learning_rate": 1.2964189002529586e-05, "loss": 0.0295, "step": 4868, "task_loss": 0.028598371893167496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924780545012066, "compression_loss": 0.0, "distillation_loss": 0.022967040538787842, "epoch": 4.62, "learning_rate": 1.2954850952075982e-05, "loss": 0.0212, "step": 4869, "task_loss": 0.004923565313220024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924843985651107, "compression_loss": 0.0, "distillation_loss": 0.07921673357486725, "epoch": 4.62, "learning_rate": 1.294551508968172e-05, "loss": 0.0833, "step": 4870, "task_loss": 0.11980371177196503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7924907390609249, "compression_loss": 0.0, "distillation_loss": 0.0501846969127655, "epoch": 4.63, "learning_rate": 1.2936181417042697e-05, "loss": 0.0469, "step": 4871, "task_loss": 0.01730802096426487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792497075989653, "compression_loss": 0.0, "distillation_loss": 0.02974601648747921, "epoch": 4.63, "learning_rate": 1.2926849935854413e-05, "loss": 0.0274, "step": 4872, "task_loss": 0.006090117618441582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925034093522986, "compression_loss": 0.0, "distillation_loss": 0.02171606570482254, "epoch": 4.63, "learning_rate": 1.2917520647811987e-05, "loss": 0.0268, "step": 4873, "task_loss": 0.07253267616033554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925097391498654, "compression_loss": 0.0, "distillation_loss": 0.022985346615314484, "epoch": 4.63, "learning_rate": 1.2908193554610128e-05, "loss": 0.0239, "step": 4874, "task_loss": 0.03176088258624077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925160653833572, "compression_loss": 0.0, "distillation_loss": 0.029063984751701355, "epoch": 4.63, "learning_rate": 1.2898868657943137e-05, "loss": 0.0273, "step": 4875, "task_loss": 0.010947054252028465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925223880537774, "compression_loss": 0.0, "distillation_loss": 0.03057437390089035, "epoch": 4.63, "learning_rate": 1.2889545959504939e-05, "loss": 0.0359, "step": 4876, "task_loss": 0.08393832296133041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79252870716213, "compression_loss": 0.0, "distillation_loss": 0.0298734400421381, "epoch": 4.63, "learning_rate": 1.2880225460989038e-05, "loss": 0.0332, "step": 4877, "task_loss": 0.0632658377289772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925350227094184, "compression_loss": 0.0, "distillation_loss": 0.0205811969935894, "epoch": 4.63, "learning_rate": 1.2870907164088557e-05, "loss": 0.0191, "step": 4878, "task_loss": 0.005966978147625923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925413346966464, "compression_loss": 0.0, "distillation_loss": 0.016166334971785545, "epoch": 4.63, "learning_rate": 1.2861591070496193e-05, "loss": 0.0274, "step": 4879, "task_loss": 0.1287321150302887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925476431248178, "compression_loss": 0.0, "distillation_loss": 0.03941744565963745, "epoch": 4.63, "learning_rate": 1.2852277181904282e-05, "loss": 0.0516, "step": 4880, "task_loss": 0.16162440180778503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925539479949362, "compression_loss": 0.0, "distillation_loss": 0.06537621468305588, "epoch": 4.64, "learning_rate": 1.2842965500004728e-05, "loss": 0.0704, "step": 4881, "task_loss": 0.11553017050027847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925602493080053, "compression_loss": 0.0, "distillation_loss": 0.036567624658346176, "epoch": 4.64, "learning_rate": 1.2833656026489028e-05, "loss": 0.034, "step": 4882, "task_loss": 0.010592048987746239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925665470650286, "compression_loss": 0.0, "distillation_loss": 0.022438403218984604, "epoch": 4.64, "learning_rate": 1.282434876304831e-05, "loss": 0.0208, "step": 4883, "task_loss": 0.006338924169540405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79257284126701, "compression_loss": 0.0, "distillation_loss": 0.07297282665967941, "epoch": 4.64, "learning_rate": 1.2815043711373285e-05, "loss": 0.0853, "step": 4884, "task_loss": 0.19581279158592224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925791319149531, "compression_loss": 0.0, "distillation_loss": 0.036048613488674164, "epoch": 4.64, "learning_rate": 1.2805740873154237e-05, "loss": 0.0621, "step": 4885, "task_loss": 0.29630377888679504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925854190098616, "compression_loss": 0.0, "distillation_loss": 0.055907703936100006, "epoch": 4.64, "learning_rate": 1.2796440250081093e-05, "loss": 0.0643, "step": 4886, "task_loss": 0.13965827226638794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925917025527391, "compression_loss": 0.0, "distillation_loss": 0.04197829216718674, "epoch": 4.64, "learning_rate": 1.2787141843843359e-05, "loss": 0.0383, "step": 4887, "task_loss": 0.0054167453199625015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7925979825445895, "compression_loss": 0.0, "distillation_loss": 0.029558390378952026, "epoch": 4.64, "learning_rate": 1.2777845656130122e-05, "loss": 0.0272, "step": 4888, "task_loss": 0.006031878292560577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926042589864162, "compression_loss": 0.0, "distillation_loss": 0.04860628768801689, "epoch": 4.64, "learning_rate": 1.276855168863008e-05, "loss": 0.0644, "step": 4889, "task_loss": 0.20697267353534698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792610531879223, "compression_loss": 0.0, "distillation_loss": 0.051894620060920715, "epoch": 4.64, "learning_rate": 1.2759259943031538e-05, "loss": 0.057, "step": 4890, "task_loss": 0.10261577367782593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926168012240138, "compression_loss": 0.0, "distillation_loss": 0.027323635295033455, "epoch": 4.64, "learning_rate": 1.2749970421022381e-05, "loss": 0.0323, "step": 4891, "task_loss": 0.07732345908880234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792623067021792, "compression_loss": 0.0, "distillation_loss": 0.05105939507484436, "epoch": 4.65, "learning_rate": 1.2740683124290081e-05, "loss": 0.0573, "step": 4892, "task_loss": 0.11377457529306412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926293292735612, "compression_loss": 0.0, "distillation_loss": 0.014040287584066391, "epoch": 4.65, "learning_rate": 1.2731398054521748e-05, "loss": 0.0218, "step": 4893, "task_loss": 0.09142255038022995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926355879803254, "compression_loss": 0.0, "distillation_loss": 0.0386216938495636, "epoch": 4.65, "learning_rate": 1.2722115213404031e-05, "loss": 0.0425, "step": 4894, "task_loss": 0.07720839232206345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926418431430882, "compression_loss": 0.0, "distillation_loss": 0.06361688673496246, "epoch": 4.65, "learning_rate": 1.2712834602623228e-05, "loss": 0.0605, "step": 4895, "task_loss": 0.03238803148269653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792648094762853, "compression_loss": 0.0, "distillation_loss": 0.02098456397652626, "epoch": 4.65, "learning_rate": 1.2703556223865181e-05, "loss": 0.0331, "step": 4896, "task_loss": 0.14170801639556885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926543428406239, "compression_loss": 0.0, "distillation_loss": 0.06998811662197113, "epoch": 4.65, "learning_rate": 1.2694280078815382e-05, "loss": 0.0669, "step": 4897, "task_loss": 0.03889083489775658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926605873774042, "compression_loss": 0.0, "distillation_loss": 0.03850919380784035, "epoch": 4.65, "learning_rate": 1.2685006169158869e-05, "loss": 0.0413, "step": 4898, "task_loss": 0.06683476269245148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926668283741979, "compression_loss": 0.0, "distillation_loss": 0.15333296358585358, "epoch": 4.65, "learning_rate": 1.2675734496580285e-05, "loss": 0.1567, "step": 4899, "task_loss": 0.18686996400356293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926730658320085, "compression_loss": 0.0, "distillation_loss": 0.12471291422843933, "epoch": 4.65, "learning_rate": 1.2666465062763894e-05, "loss": 0.1294, "step": 4900, "task_loss": 0.17144428193569183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926792997518396, "compression_loss": 0.0, "distillation_loss": 0.11362001299858093, "epoch": 4.65, "learning_rate": 1.2657197869393523e-05, "loss": 0.1113, "step": 4901, "task_loss": 0.09047464281320572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926855301346951, "compression_loss": 0.0, "distillation_loss": 0.029277116060256958, "epoch": 4.66, "learning_rate": 1.2647932918152606e-05, "loss": 0.0279, "step": 4902, "task_loss": 0.015509987249970436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926917569815787, "compression_loss": 0.0, "distillation_loss": 0.0541316494345665, "epoch": 4.66, "learning_rate": 1.2638670210724138e-05, "loss": 0.0605, "step": 4903, "task_loss": 0.11828672140836716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7926979802934938, "compression_loss": 0.0, "distillation_loss": 0.038249123841524124, "epoch": 4.66, "learning_rate": 1.2629409748790782e-05, "loss": 0.0403, "step": 4904, "task_loss": 0.058266542851924896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927042000714443, "compression_loss": 0.0, "distillation_loss": 0.055571284145116806, "epoch": 4.66, "learning_rate": 1.262015153403472e-05, "loss": 0.0609, "step": 4905, "task_loss": 0.10911326855421066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927104163164338, "compression_loss": 0.0, "distillation_loss": 0.0703677237033844, "epoch": 4.66, "learning_rate": 1.2610895568137754e-05, "loss": 0.0799, "step": 4906, "task_loss": 0.1653900146484375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927166290294662, "compression_loss": 0.0, "distillation_loss": 0.04402827471494675, "epoch": 4.66, "learning_rate": 1.2601641852781265e-05, "loss": 0.0499, "step": 4907, "task_loss": 0.1026904359459877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927228382115448, "compression_loss": 0.0, "distillation_loss": 0.0630364716053009, "epoch": 4.66, "learning_rate": 1.2592390389646258e-05, "loss": 0.0653, "step": 4908, "task_loss": 0.08615773171186447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927290438636735, "compression_loss": 0.0, "distillation_loss": 0.1884252429008484, "epoch": 4.66, "learning_rate": 1.2583141180413288e-05, "loss": 0.1818, "step": 4909, "task_loss": 0.12265457212924957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792735245986856, "compression_loss": 0.0, "distillation_loss": 0.030301768332719803, "epoch": 4.66, "learning_rate": 1.2573894226762518e-05, "loss": 0.03, "step": 4910, "task_loss": 0.02761382982134819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927414445820959, "compression_loss": 0.0, "distillation_loss": 0.08405353128910065, "epoch": 4.66, "learning_rate": 1.25646495303737e-05, "loss": 0.0817, "step": 4911, "task_loss": 0.06076852232217789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792747639650397, "compression_loss": 0.0, "distillation_loss": 0.02579943835735321, "epoch": 4.66, "learning_rate": 1.2555407092926197e-05, "loss": 0.0374, "step": 4912, "task_loss": 0.14211821556091309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927538311927629, "compression_loss": 0.0, "distillation_loss": 0.07202491909265518, "epoch": 4.67, "learning_rate": 1.2546166916098928e-05, "loss": 0.0744, "step": 4913, "task_loss": 0.09626448899507523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927600192101972, "compression_loss": 0.0, "distillation_loss": 0.04386778920888901, "epoch": 4.67, "learning_rate": 1.253692900157041e-05, "loss": 0.0428, "step": 4914, "task_loss": 0.03336333483457565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927662037037038, "compression_loss": 0.0, "distillation_loss": 0.05689205601811409, "epoch": 4.67, "learning_rate": 1.252769335101877e-05, "loss": 0.0683, "step": 4915, "task_loss": 0.17090968787670135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927723846742861, "compression_loss": 0.0, "distillation_loss": 0.1544632911682129, "epoch": 4.67, "learning_rate": 1.25184599661217e-05, "loss": 0.1538, "step": 4916, "task_loss": 0.14771823585033417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927785621229481, "compression_loss": 0.0, "distillation_loss": 0.015177516266703606, "epoch": 4.67, "learning_rate": 1.2509228848556482e-05, "loss": 0.0142, "step": 4917, "task_loss": 0.005492331460118294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927847360506932, "compression_loss": 0.0, "distillation_loss": 0.039150480180978775, "epoch": 4.67, "learning_rate": 1.2500000000000006e-05, "loss": 0.0361, "step": 4918, "task_loss": 0.00842754915356636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927909064585252, "compression_loss": 0.0, "distillation_loss": 0.024652540683746338, "epoch": 4.67, "learning_rate": 1.2490773422128732e-05, "loss": 0.0228, "step": 4919, "task_loss": 0.005637306720018387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7927970733474479, "compression_loss": 0.0, "distillation_loss": 0.04846763238310814, "epoch": 4.67, "learning_rate": 1.2481549116618698e-05, "loss": 0.0448, "step": 4920, "task_loss": 0.011590449139475822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928032367184648, "compression_loss": 0.0, "distillation_loss": 0.014604704454541206, "epoch": 4.67, "learning_rate": 1.247232708514556e-05, "loss": 0.0136, "step": 4921, "task_loss": 0.004115687683224678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928093965725795, "compression_loss": 0.0, "distillation_loss": 0.027096299454569817, "epoch": 4.67, "learning_rate": 1.2463107329384552e-05, "loss": 0.0252, "step": 4922, "task_loss": 0.008570542559027672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.792815552910796, "compression_loss": 0.0, "distillation_loss": 0.023527517914772034, "epoch": 4.68, "learning_rate": 1.2453889851010473e-05, "loss": 0.0217, "step": 4923, "task_loss": 0.00551653653383255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928217057341178, "compression_loss": 0.0, "distillation_loss": 0.07601621747016907, "epoch": 4.68, "learning_rate": 1.2444674651697716e-05, "loss": 0.0799, "step": 4924, "task_loss": 0.1144181340932846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928278550435486, "compression_loss": 0.0, "distillation_loss": 0.02135540172457695, "epoch": 4.68, "learning_rate": 1.2435461733120287e-05, "loss": 0.0337, "step": 4925, "task_loss": 0.1447429358959198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928340008400919, "compression_loss": 0.0, "distillation_loss": 0.0660846009850502, "epoch": 4.68, "learning_rate": 1.2426251096951744e-05, "loss": 0.064, "step": 4926, "task_loss": 0.04477040097117424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928401431247517, "compression_loss": 0.0, "distillation_loss": 0.037703558802604675, "epoch": 4.68, "learning_rate": 1.2417042744865237e-05, "loss": 0.0531, "step": 4927, "task_loss": 0.19128406047821045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928462818985316, "compression_loss": 0.0, "distillation_loss": 0.024694714695215225, "epoch": 4.68, "learning_rate": 1.2407836678533523e-05, "loss": 0.0321, "step": 4928, "task_loss": 0.09900853782892227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928524171624352, "compression_loss": 0.0, "distillation_loss": 0.05749480798840523, "epoch": 4.68, "learning_rate": 1.2398632899628912e-05, "loss": 0.054, "step": 4929, "task_loss": 0.022426774725317955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928585489174661, "compression_loss": 0.0, "distillation_loss": 0.029665688052773476, "epoch": 4.68, "learning_rate": 1.2389431409823336e-05, "loss": 0.0283, "step": 4930, "task_loss": 0.01619105413556099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928646771646282, "compression_loss": 0.0, "distillation_loss": 0.031163331121206284, "epoch": 4.68, "learning_rate": 1.2380232210788265e-05, "loss": 0.0326, "step": 4931, "task_loss": 0.045856185257434845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928708019049249, "compression_loss": 0.0, "distillation_loss": 0.08584082871675491, "epoch": 4.68, "learning_rate": 1.23710353041948e-05, "loss": 0.0879, "step": 4932, "task_loss": 0.10655449330806732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928769231393602, "compression_loss": 0.0, "distillation_loss": 0.11349561810493469, "epoch": 4.68, "learning_rate": 1.2361840691713595e-05, "loss": 0.1096, "step": 4933, "task_loss": 0.07412466406822205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928830408689376, "compression_loss": 0.0, "distillation_loss": 0.0716467946767807, "epoch": 4.69, "learning_rate": 1.2352648375014883e-05, "loss": 0.0791, "step": 4934, "task_loss": 0.14578455686569214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928891550946607, "compression_loss": 0.0, "distillation_loss": 0.03932729363441467, "epoch": 4.69, "learning_rate": 1.2343458355768513e-05, "loss": 0.0449, "step": 4935, "task_loss": 0.09523576498031616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7928952658175334, "compression_loss": 0.0, "distillation_loss": 0.06750982254743576, "epoch": 4.69, "learning_rate": 1.233427063564389e-05, "loss": 0.0789, "step": 4936, "task_loss": 0.18135161697864532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929013730385593, "compression_loss": 0.0, "distillation_loss": 0.05587318539619446, "epoch": 4.69, "learning_rate": 1.2325085216309994e-05, "loss": 0.066, "step": 4937, "task_loss": 0.156914621591568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929074767587421, "compression_loss": 0.0, "distillation_loss": 0.044295355677604675, "epoch": 4.69, "learning_rate": 1.231590209943541e-05, "loss": 0.0426, "step": 4938, "task_loss": 0.02747635915875435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929135769790853, "compression_loss": 0.0, "distillation_loss": 0.010653965175151825, "epoch": 4.69, "learning_rate": 1.230672128668831e-05, "loss": 0.0163, "step": 4939, "task_loss": 0.06707858294248581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929196737005928, "compression_loss": 0.0, "distillation_loss": 0.10607406497001648, "epoch": 4.69, "learning_rate": 1.2297542779736417e-05, "loss": 0.1061, "step": 4940, "task_loss": 0.10609476268291473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929257669242682, "compression_loss": 0.0, "distillation_loss": 0.025449592620134354, "epoch": 4.69, "learning_rate": 1.2288366580247047e-05, "loss": 0.0324, "step": 4941, "task_loss": 0.09515959024429321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929318566511152, "compression_loss": 0.0, "distillation_loss": 0.042621444910764694, "epoch": 4.69, "learning_rate": 1.2279192689887115e-05, "loss": 0.0455, "step": 4942, "task_loss": 0.07145626842975616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929379428821375, "compression_loss": 0.0, "distillation_loss": 0.01993633061647415, "epoch": 4.69, "learning_rate": 1.2270021110323096e-05, "loss": 0.0186, "step": 4943, "task_loss": 0.00634993240237236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929440256183387, "compression_loss": 0.0, "distillation_loss": 0.014678630977869034, "epoch": 4.7, "learning_rate": 1.2260851843221039e-05, "loss": 0.0236, "step": 4944, "task_loss": 0.10428041964769363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929501048607225, "compression_loss": 0.0, "distillation_loss": 0.02224035933613777, "epoch": 4.7, "learning_rate": 1.225168489024661e-05, "loss": 0.0263, "step": 4945, "task_loss": 0.0633140280842781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929561806102927, "compression_loss": 0.0, "distillation_loss": 0.0771671012043953, "epoch": 4.7, "learning_rate": 1.2242520253065004e-05, "loss": 0.0854, "step": 4946, "task_loss": 0.15969133377075195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929622528680528, "compression_loss": 0.0, "distillation_loss": 0.012984257191419601, "epoch": 4.7, "learning_rate": 1.2233357933341047e-05, "loss": 0.0249, "step": 4947, "task_loss": 0.13174743950366974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929683216350066, "compression_loss": 0.0, "distillation_loss": 0.013885335996747017, "epoch": 4.7, "learning_rate": 1.2224197932739096e-05, "loss": 0.0128, "step": 4948, "task_loss": 0.0031508170068264008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929743869121578, "compression_loss": 0.0, "distillation_loss": 0.01262105256319046, "epoch": 4.7, "learning_rate": 1.2215040252923127e-05, "loss": 0.021, "step": 4949, "task_loss": 0.09617089480161667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929804487005101, "compression_loss": 0.0, "distillation_loss": 0.07396592199802399, "epoch": 4.7, "learning_rate": 1.2205884895556672e-05, "loss": 0.0722, "step": 4950, "task_loss": 0.05672897398471832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929865070010671, "compression_loss": 0.0, "distillation_loss": 0.019163163378834724, "epoch": 4.7, "learning_rate": 1.219673186230283e-05, "loss": 0.018, "step": 4951, "task_loss": 0.007804272696375847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7929925618148325, "compression_loss": 0.0, "distillation_loss": 0.06364789605140686, "epoch": 4.7, "learning_rate": 1.2187581154824317e-05, "loss": 0.0665, "step": 4952, "task_loss": 0.09249377250671387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79299861314281, "compression_loss": 0.0, "distillation_loss": 0.09652909636497498, "epoch": 4.7, "learning_rate": 1.2178432774783394e-05, "loss": 0.0985, "step": 4953, "task_loss": 0.11579639464616776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930046609860032, "compression_loss": 0.0, "distillation_loss": 0.0369391068816185, "epoch": 4.7, "learning_rate": 1.2169286723841897e-05, "loss": 0.0342, "step": 4954, "task_loss": 0.009912891313433647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930107053454158, "compression_loss": 0.0, "distillation_loss": 0.03373286873102188, "epoch": 4.71, "learning_rate": 1.216014300366126e-05, "loss": 0.0389, "step": 4955, "task_loss": 0.08519351482391357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930167462220518, "compression_loss": 0.0, "distillation_loss": 0.024067046120762825, "epoch": 4.71, "learning_rate": 1.2151001615902493e-05, "loss": 0.032, "step": 4956, "task_loss": 0.10371088981628418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930227836169144, "compression_loss": 0.0, "distillation_loss": 0.14827989041805267, "epoch": 4.71, "learning_rate": 1.2141862562226165e-05, "loss": 0.1495, "step": 4957, "task_loss": 0.15999506413936615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930288175310075, "compression_loss": 0.0, "distillation_loss": 0.04894055798649788, "epoch": 4.71, "learning_rate": 1.2132725844292416e-05, "loss": 0.0561, "step": 4958, "task_loss": 0.12008975446224213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930348479653349, "compression_loss": 0.0, "distillation_loss": 0.08717606216669083, "epoch": 4.71, "learning_rate": 1.2123591463760997e-05, "loss": 0.0892, "step": 4959, "task_loss": 0.10745367407798767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930408749209001, "compression_loss": 0.0, "distillation_loss": 0.030964989215135574, "epoch": 4.71, "learning_rate": 1.2114459422291205e-05, "loss": 0.0285, "step": 4960, "task_loss": 0.006043644621968269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930468983987069, "compression_loss": 0.0, "distillation_loss": 0.021385207772254944, "epoch": 4.71, "learning_rate": 1.2105329721541903e-05, "loss": 0.03, "step": 4961, "task_loss": 0.10744792968034744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930529183997588, "compression_loss": 0.0, "distillation_loss": 0.1010364517569542, "epoch": 4.71, "learning_rate": 1.2096202363171571e-05, "loss": 0.0969, "step": 4962, "task_loss": 0.05933345481753349 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930589349250597, "compression_loss": 0.0, "distillation_loss": 0.0767788365483284, "epoch": 4.71, "learning_rate": 1.2087077348838214e-05, "loss": 0.0806, "step": 4963, "task_loss": 0.11461080610752106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930649479756132, "compression_loss": 0.0, "distillation_loss": 0.12463347613811493, "epoch": 4.71, "learning_rate": 1.2077954680199455e-05, "loss": 0.1252, "step": 4964, "task_loss": 0.12991327047348022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793070957552423, "compression_loss": 0.0, "distillation_loss": 0.02803153544664383, "epoch": 4.72, "learning_rate": 1.2068834358912454e-05, "loss": 0.0265, "step": 4965, "task_loss": 0.012602541595697403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930769636564927, "compression_loss": 0.0, "distillation_loss": 0.036852188408374786, "epoch": 4.72, "learning_rate": 1.2059716386633977e-05, "loss": 0.0409, "step": 4966, "task_loss": 0.07686673104763031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930829662888261, "compression_loss": 0.0, "distillation_loss": 0.01830023154616356, "epoch": 4.72, "learning_rate": 1.205060076502034e-05, "loss": 0.0173, "step": 4967, "task_loss": 0.00809447094798088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930889654504267, "compression_loss": 0.0, "distillation_loss": 0.22687265276908875, "epoch": 4.72, "learning_rate": 1.204148749572743e-05, "loss": 0.2179, "step": 4968, "task_loss": 0.136878103017807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7930949611422985, "compression_loss": 0.0, "distillation_loss": 0.030822109431028366, "epoch": 4.72, "learning_rate": 1.2032376580410731e-05, "loss": 0.0291, "step": 4969, "task_loss": 0.013280034065246582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931009533654448, "compression_loss": 0.0, "distillation_loss": 0.016060050576925278, "epoch": 4.72, "learning_rate": 1.2023268020725285e-05, "loss": 0.0427, "step": 4970, "task_loss": 0.2822893261909485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931069421208696, "compression_loss": 0.0, "distillation_loss": 0.16523435711860657, "epoch": 4.72, "learning_rate": 1.2014161818325687e-05, "loss": 0.1601, "step": 4971, "task_loss": 0.11406517028808594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931129274095764, "compression_loss": 0.0, "distillation_loss": 0.04404463618993759, "epoch": 4.72, "learning_rate": 1.2005057974866135e-05, "loss": 0.047, "step": 4972, "task_loss": 0.07401682436466217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931189092325689, "compression_loss": 0.0, "distillation_loss": 0.03614489734172821, "epoch": 4.72, "learning_rate": 1.1995956492000397e-05, "loss": 0.0331, "step": 4973, "task_loss": 0.005481433123350143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931248875908509, "compression_loss": 0.0, "distillation_loss": 0.048779651522636414, "epoch": 4.72, "learning_rate": 1.1986857371381788e-05, "loss": 0.0466, "step": 4974, "task_loss": 0.027400122955441475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931308624854259, "compression_loss": 0.0, "distillation_loss": 0.03478461876511574, "epoch": 4.72, "learning_rate": 1.19777606146632e-05, "loss": 0.0362, "step": 4975, "task_loss": 0.04934518039226532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931368339172977, "compression_loss": 0.0, "distillation_loss": 0.10953269898891449, "epoch": 4.73, "learning_rate": 1.1968666223497124e-05, "loss": 0.1125, "step": 4976, "task_loss": 0.1394570916891098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79314280188747, "compression_loss": 0.0, "distillation_loss": 0.039646346122026443, "epoch": 4.73, "learning_rate": 1.1959574199535586e-05, "loss": 0.0363, "step": 4977, "task_loss": 0.005907488986849785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931487663969464, "compression_loss": 0.0, "distillation_loss": 0.017676934599876404, "epoch": 4.73, "learning_rate": 1.1950484544430191e-05, "loss": 0.0241, "step": 4978, "task_loss": 0.08169616758823395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931547274467307, "compression_loss": 0.0, "distillation_loss": 0.035570040345191956, "epoch": 4.73, "learning_rate": 1.1941397259832133e-05, "loss": 0.0371, "step": 4979, "task_loss": 0.05074803903698921 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931606850378264, "compression_loss": 0.0, "distillation_loss": 0.058902185410261154, "epoch": 4.73, "learning_rate": 1.1932312347392154e-05, "loss": 0.0549, "step": 4980, "task_loss": 0.019008172675967216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931666391712374, "compression_loss": 0.0, "distillation_loss": 0.014614107087254524, "epoch": 4.73, "learning_rate": 1.1923229808760564e-05, "loss": 0.0136, "step": 4981, "task_loss": 0.0042775776237249374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931725898479672, "compression_loss": 0.0, "distillation_loss": 0.16022302210330963, "epoch": 4.73, "learning_rate": 1.1914149645587256e-05, "loss": 0.1527, "step": 4982, "task_loss": 0.08545684814453125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931785370690195, "compression_loss": 0.0, "distillation_loss": 0.03780351206660271, "epoch": 4.73, "learning_rate": 1.1905071859521697e-05, "loss": 0.0506, "step": 4983, "task_loss": 0.16610009968280792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931844808353982, "compression_loss": 0.0, "distillation_loss": 0.04032333940267563, "epoch": 4.73, "learning_rate": 1.1895996452212898e-05, "loss": 0.0395, "step": 4984, "task_loss": 0.032581619918346405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931904211481067, "compression_loss": 0.0, "distillation_loss": 0.03943778946995735, "epoch": 4.73, "learning_rate": 1.1886923425309445e-05, "loss": 0.0375, "step": 4985, "task_loss": 0.020389195531606674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7931963580081488, "compression_loss": 0.0, "distillation_loss": 0.0650925487279892, "epoch": 4.74, "learning_rate": 1.1877852780459518e-05, "loss": 0.0637, "step": 4986, "task_loss": 0.05133982002735138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932022914165282, "compression_loss": 0.0, "distillation_loss": 0.023483579978346825, "epoch": 4.74, "learning_rate": 1.1868784519310826e-05, "loss": 0.023, "step": 4987, "task_loss": 0.01817641593515873 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932082213742486, "compression_loss": 0.0, "distillation_loss": 0.038998499512672424, "epoch": 4.74, "learning_rate": 1.185971864351067e-05, "loss": 0.0598, "step": 4988, "task_loss": 0.24691206216812134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932141478823136, "compression_loss": 0.0, "distillation_loss": 0.08431239426136017, "epoch": 4.74, "learning_rate": 1.1850655154705886e-05, "loss": 0.078, "step": 4989, "task_loss": 0.020718101412057877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793220070941727, "compression_loss": 0.0, "distillation_loss": 0.01440352015197277, "epoch": 4.74, "learning_rate": 1.184159405454294e-05, "loss": 0.0134, "step": 4990, "task_loss": 0.004175456240773201 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932259905534923, "compression_loss": 0.0, "distillation_loss": 0.039678964763879776, "epoch": 4.74, "learning_rate": 1.1832535344667806e-05, "loss": 0.0511, "step": 4991, "task_loss": 0.1535629779100418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932319067186134, "compression_loss": 0.0, "distillation_loss": 0.187263622879982, "epoch": 4.74, "learning_rate": 1.1823479026726031e-05, "loss": 0.1935, "step": 4992, "task_loss": 0.24967440962791443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932378194380938, "compression_loss": 0.0, "distillation_loss": 0.013006833381950855, "epoch": 4.74, "learning_rate": 1.1814425102362761e-05, "loss": 0.0231, "step": 4993, "task_loss": 0.11390747874975204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932437287129374, "compression_loss": 0.0, "distillation_loss": 0.03500215336680412, "epoch": 4.74, "learning_rate": 1.1805373573222673e-05, "loss": 0.0377, "step": 4994, "task_loss": 0.06191583350300789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932496345441475, "compression_loss": 0.0, "distillation_loss": 0.036350950598716736, "epoch": 4.74, "learning_rate": 1.1796324440950021e-05, "loss": 0.0333, "step": 4995, "task_loss": 0.0061857327818870544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932555369327282, "compression_loss": 0.0, "distillation_loss": 0.053805820643901825, "epoch": 4.74, "learning_rate": 1.1787277707188616e-05, "loss": 0.0542, "step": 4996, "task_loss": 0.058030955493450165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932614358796829, "compression_loss": 0.0, "distillation_loss": 0.12426917254924774, "epoch": 4.75, "learning_rate": 1.1778233373581857e-05, "loss": 0.119, "step": 4997, "task_loss": 0.07191702723503113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932673313860155, "compression_loss": 0.0, "distillation_loss": 0.026698037981987, "epoch": 4.75, "learning_rate": 1.1769191441772672e-05, "loss": 0.0249, "step": 4998, "task_loss": 0.008508618921041489 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932732234527294, "compression_loss": 0.0, "distillation_loss": 0.02993636205792427, "epoch": 4.75, "learning_rate": 1.1760151913403583e-05, "loss": 0.0303, "step": 4999, "task_loss": 0.03406871110200882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932791120808286, "compression_loss": 0.0, "distillation_loss": 0.11071500182151794, "epoch": 4.75, "learning_rate": 1.1751114790116672e-05, "loss": 0.1129, "step": 5000, "task_loss": 0.13242202997207642 }, { "epoch": 4.75, "eval_accuracy": 0.8967889908256881, "eval_loss": 0.4209362268447876, "eval_runtime": 17.9495, "eval_samples_per_second": 48.581, "eval_steps_per_second": 6.073, "step": 5000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932849972713165, "compression_loss": 0.0, "distillation_loss": 0.022589676082134247, "epoch": 4.75, "learning_rate": 1.1742080073553565e-05, "loss": 0.0305, "step": 5001, "task_loss": 0.10146450996398926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793290879025197, "compression_loss": 0.0, "distillation_loss": 0.13711608946323395, "epoch": 4.75, "learning_rate": 1.1733047765355466e-05, "loss": 0.1323, "step": 5002, "task_loss": 0.08903352916240692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7932967573434737, "compression_loss": 0.0, "distillation_loss": 0.06641732901334763, "epoch": 4.75, "learning_rate": 1.1724017867163125e-05, "loss": 0.0615, "step": 5003, "task_loss": 0.01706705428659916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933026322271504, "compression_loss": 0.0, "distillation_loss": 0.10634138435125351, "epoch": 4.75, "learning_rate": 1.1714990380616884e-05, "loss": 0.1025, "step": 5004, "task_loss": 0.06803934276103973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933085036772304, "compression_loss": 0.0, "distillation_loss": 0.032394587993621826, "epoch": 4.75, "learning_rate": 1.1705965307356624e-05, "loss": 0.0461, "step": 5005, "task_loss": 0.16980022192001343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933143716947177, "compression_loss": 0.0, "distillation_loss": 0.03210689127445221, "epoch": 4.75, "learning_rate": 1.169694264902178e-05, "loss": 0.0516, "step": 5006, "task_loss": 0.22744594514369965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933202362806161, "compression_loss": 0.0, "distillation_loss": 0.05615242198109627, "epoch": 4.75, "learning_rate": 1.168792240725137e-05, "loss": 0.0532, "step": 5007, "task_loss": 0.0271142590790987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793326097435929, "compression_loss": 0.0, "distillation_loss": 0.182695671916008, "epoch": 4.76, "learning_rate": 1.1678904583683979e-05, "loss": 0.1831, "step": 5008, "task_loss": 0.18713723123073578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933319551616602, "compression_loss": 0.0, "distillation_loss": 0.07359650731086731, "epoch": 4.76, "learning_rate": 1.1669889179957725e-05, "loss": 0.0844, "step": 5009, "task_loss": 0.1816483587026596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933378094588133, "compression_loss": 0.0, "distillation_loss": 0.11011994630098343, "epoch": 4.76, "learning_rate": 1.1660876197710288e-05, "loss": 0.1121, "step": 5010, "task_loss": 0.13008421659469604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933436603283921, "compression_loss": 0.0, "distillation_loss": 0.02712424099445343, "epoch": 4.76, "learning_rate": 1.1651865638578944e-05, "loss": 0.0252, "step": 5011, "task_loss": 0.008356472477316856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933495077714002, "compression_loss": 0.0, "distillation_loss": 0.058398209512233734, "epoch": 4.76, "learning_rate": 1.1642857504200491e-05, "loss": 0.053, "step": 5012, "task_loss": 0.004138745367527008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933553517888414, "compression_loss": 0.0, "distillation_loss": 0.02487853169441223, "epoch": 4.76, "learning_rate": 1.1633851796211292e-05, "loss": 0.0327, "step": 5013, "task_loss": 0.10304756462574005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933611923817193, "compression_loss": 0.0, "distillation_loss": 0.09818457067012787, "epoch": 4.76, "learning_rate": 1.1624848516247295e-05, "loss": 0.0971, "step": 5014, "task_loss": 0.08704036474227905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933670295510376, "compression_loss": 0.0, "distillation_loss": 0.08643116801977158, "epoch": 4.76, "learning_rate": 1.1615847665943971e-05, "loss": 0.0919, "step": 5015, "task_loss": 0.14145499467849731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933728632977998, "compression_loss": 0.0, "distillation_loss": 0.027642257511615753, "epoch": 4.76, "learning_rate": 1.1606849246936389e-05, "loss": 0.0295, "step": 5016, "task_loss": 0.045945487916469574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933786936230098, "compression_loss": 0.0, "distillation_loss": 0.024577468633651733, "epoch": 4.76, "learning_rate": 1.1597853260859128e-05, "loss": 0.026, "step": 5017, "task_loss": 0.039100244641304016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933845205276713, "compression_loss": 0.0, "distillation_loss": 0.0331391841173172, "epoch": 4.77, "learning_rate": 1.1588859709346384e-05, "loss": 0.0595, "step": 5018, "task_loss": 0.29628539085388184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793390344012788, "compression_loss": 0.0, "distillation_loss": 0.03754423186182976, "epoch": 4.77, "learning_rate": 1.157986859403186e-05, "loss": 0.0382, "step": 5019, "task_loss": 0.04380743205547333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7933961640793633, "compression_loss": 0.0, "distillation_loss": 0.11798399686813354, "epoch": 4.77, "learning_rate": 1.1570879916548827e-05, "loss": 0.1147, "step": 5020, "task_loss": 0.0851517766714096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934019807284012, "compression_loss": 0.0, "distillation_loss": 0.034330565482378006, "epoch": 4.77, "learning_rate": 1.1561893678530141e-05, "loss": 0.0472, "step": 5021, "task_loss": 0.16320303082466125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934077939609051, "compression_loss": 0.0, "distillation_loss": 0.04643068462610245, "epoch": 4.77, "learning_rate": 1.155290988160819e-05, "loss": 0.0523, "step": 5022, "task_loss": 0.10536113381385803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793413603777879, "compression_loss": 0.0, "distillation_loss": 0.022104818373918533, "epoch": 4.77, "learning_rate": 1.154392852741491e-05, "loss": 0.036, "step": 5023, "task_loss": 0.1612529158592224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934194101803264, "compression_loss": 0.0, "distillation_loss": 0.09352529048919678, "epoch": 4.77, "learning_rate": 1.153494961758182e-05, "loss": 0.106, "step": 5024, "task_loss": 0.21867401897907257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934252131692509, "compression_loss": 0.0, "distillation_loss": 0.02358633652329445, "epoch": 4.77, "learning_rate": 1.1525973153739989e-05, "loss": 0.0252, "step": 5025, "task_loss": 0.03958255797624588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934310127456563, "compression_loss": 0.0, "distillation_loss": 0.04170753434300423, "epoch": 4.77, "learning_rate": 1.1516999137520023e-05, "loss": 0.0451, "step": 5026, "task_loss": 0.07600220292806625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934368089105464, "compression_loss": 0.0, "distillation_loss": 0.047912318259477615, "epoch": 4.77, "learning_rate": 1.1508027570552094e-05, "loss": 0.0595, "step": 5027, "task_loss": 0.16383595764636993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934426016649245, "compression_loss": 0.0, "distillation_loss": 0.06721566617488861, "epoch": 4.77, "learning_rate": 1.1499058454465941e-05, "loss": 0.0778, "step": 5028, "task_loss": 0.1731707900762558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934483910097948, "compression_loss": 0.0, "distillation_loss": 0.04140312969684601, "epoch": 4.78, "learning_rate": 1.1490091790890842e-05, "loss": 0.0483, "step": 5029, "task_loss": 0.11057362705469131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934541769461606, "compression_loss": 0.0, "distillation_loss": 0.04200680926442146, "epoch": 4.78, "learning_rate": 1.1481127581455626e-05, "loss": 0.0499, "step": 5030, "task_loss": 0.12135404348373413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934599594750257, "compression_loss": 0.0, "distillation_loss": 0.04865646734833717, "epoch": 4.78, "learning_rate": 1.1472165827788697e-05, "loss": 0.0534, "step": 5031, "task_loss": 0.09640198945999146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934657385973938, "compression_loss": 0.0, "distillation_loss": 0.08559805154800415, "epoch": 4.78, "learning_rate": 1.1463206531518001e-05, "loss": 0.0895, "step": 5032, "task_loss": 0.12415318191051483 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934715143142684, "compression_loss": 0.0, "distillation_loss": 0.04011882096529007, "epoch": 4.78, "learning_rate": 1.1454249694271021e-05, "loss": 0.0579, "step": 5033, "task_loss": 0.21819335222244263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934772866266535, "compression_loss": 0.0, "distillation_loss": 0.08635386824607849, "epoch": 4.78, "learning_rate": 1.144529531767482e-05, "loss": 0.09, "step": 5034, "task_loss": 0.12273044884204865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934830555355527, "compression_loss": 0.0, "distillation_loss": 0.02714727818965912, "epoch": 4.78, "learning_rate": 1.1436343403356017e-05, "loss": 0.0251, "step": 5035, "task_loss": 0.006460065022110939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934888210419694, "compression_loss": 0.0, "distillation_loss": 0.080211341381073, "epoch": 4.78, "learning_rate": 1.1427393952940754e-05, "loss": 0.0851, "step": 5036, "task_loss": 0.12943881750106812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7934945831469076, "compression_loss": 0.0, "distillation_loss": 0.04694737493991852, "epoch": 4.78, "learning_rate": 1.1418446968054741e-05, "loss": 0.0637, "step": 5037, "task_loss": 0.21490800380706787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935003418513709, "compression_loss": 0.0, "distillation_loss": 0.1910262107849121, "epoch": 4.78, "learning_rate": 1.1409502450323254e-05, "loss": 0.1973, "step": 5038, "task_loss": 0.25344181060791016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935060971563629, "compression_loss": 0.0, "distillation_loss": 0.08850134164094925, "epoch": 4.79, "learning_rate": 1.1400560401371097e-05, "loss": 0.0971, "step": 5039, "task_loss": 0.17405250668525696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935118490628873, "compression_loss": 0.0, "distillation_loss": 0.023128552362322807, "epoch": 4.79, "learning_rate": 1.1391620822822629e-05, "loss": 0.0234, "step": 5040, "task_loss": 0.025910664349794388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935175975719478, "compression_loss": 0.0, "distillation_loss": 0.02330312877893448, "epoch": 4.79, "learning_rate": 1.1382683716301781e-05, "loss": 0.0262, "step": 5041, "task_loss": 0.0519491508603096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935233426845482, "compression_loss": 0.0, "distillation_loss": 0.029029665514826775, "epoch": 4.79, "learning_rate": 1.1373749083432025e-05, "loss": 0.0293, "step": 5042, "task_loss": 0.03185213357210159 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793529084401692, "compression_loss": 0.0, "distillation_loss": 0.030532198026776314, "epoch": 4.79, "learning_rate": 1.1364816925836372e-05, "loss": 0.0405, "step": 5043, "task_loss": 0.13049139082431793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793534822724383, "compression_loss": 0.0, "distillation_loss": 0.030790645629167557, "epoch": 4.79, "learning_rate": 1.1355887245137383e-05, "loss": 0.0355, "step": 5044, "task_loss": 0.07748115062713623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935405576536249, "compression_loss": 0.0, "distillation_loss": 0.10416917502880096, "epoch": 4.79, "learning_rate": 1.1346960042957197e-05, "loss": 0.1008, "step": 5045, "task_loss": 0.07013058662414551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935462891904211, "compression_loss": 0.0, "distillation_loss": 0.018903907388448715, "epoch": 4.79, "learning_rate": 1.1338035320917473e-05, "loss": 0.0232, "step": 5046, "task_loss": 0.06144241988658905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935520173357757, "compression_loss": 0.0, "distillation_loss": 0.012658301740884781, "epoch": 4.79, "learning_rate": 1.1329113080639419e-05, "loss": 0.029, "step": 5047, "task_loss": 0.17617405951023102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935577420906922, "compression_loss": 0.0, "distillation_loss": 0.025603320449590683, "epoch": 4.79, "learning_rate": 1.1320193323743824e-05, "loss": 0.026, "step": 5048, "task_loss": 0.02996073290705681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935634634561742, "compression_loss": 0.0, "distillation_loss": 0.04193894937634468, "epoch": 4.79, "learning_rate": 1.1311276051850994e-05, "loss": 0.0599, "step": 5049, "task_loss": 0.22172468900680542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935691814332255, "compression_loss": 0.0, "distillation_loss": 0.089395672082901, "epoch": 4.8, "learning_rate": 1.1302361266580786e-05, "loss": 0.0867, "step": 5050, "task_loss": 0.0627622902393341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935748960228497, "compression_loss": 0.0, "distillation_loss": 0.03509131073951721, "epoch": 4.8, "learning_rate": 1.1293448969552623e-05, "loss": 0.0442, "step": 5051, "task_loss": 0.12643598020076752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935806072260505, "compression_loss": 0.0, "distillation_loss": 0.035416148602962494, "epoch": 4.8, "learning_rate": 1.1284539162385474e-05, "loss": 0.0432, "step": 5052, "task_loss": 0.1130717545747757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935863150438317, "compression_loss": 0.0, "distillation_loss": 0.038104936480522156, "epoch": 4.8, "learning_rate": 1.127563184669784e-05, "loss": 0.0569, "step": 5053, "task_loss": 0.226011723279953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935920194771968, "compression_loss": 0.0, "distillation_loss": 0.07715682685375214, "epoch": 4.8, "learning_rate": 1.1266727024107771e-05, "loss": 0.0907, "step": 5054, "task_loss": 0.2121979296207428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7935977205271496, "compression_loss": 0.0, "distillation_loss": 0.02819431945681572, "epoch": 4.8, "learning_rate": 1.1257824696232888e-05, "loss": 0.0261, "step": 5055, "task_loss": 0.007308483123779297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936034181946937, "compression_loss": 0.0, "distillation_loss": 0.020847158506512642, "epoch": 4.8, "learning_rate": 1.124892486469033e-05, "loss": 0.0193, "step": 5056, "task_loss": 0.005755210295319557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936091124808329, "compression_loss": 0.0, "distillation_loss": 0.03235051780939102, "epoch": 4.8, "learning_rate": 1.1240027531096786e-05, "loss": 0.0299, "step": 5057, "task_loss": 0.007758015766739845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936148033865708, "compression_loss": 0.0, "distillation_loss": 0.19748008251190186, "epoch": 4.8, "learning_rate": 1.1231132697068523e-05, "loss": 0.1925, "step": 5058, "task_loss": 0.14814594388008118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793620490912911, "compression_loss": 0.0, "distillation_loss": 0.013191369362175465, "epoch": 4.8, "learning_rate": 1.1222240364221303e-05, "loss": 0.0211, "step": 5059, "task_loss": 0.09272238612174988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936261750608573, "compression_loss": 0.0, "distillation_loss": 0.05488280951976776, "epoch": 4.81, "learning_rate": 1.1213350534170488e-05, "loss": 0.0512, "step": 5060, "task_loss": 0.018499650061130524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936318558314134, "compression_loss": 0.0, "distillation_loss": 0.025873934850096703, "epoch": 4.81, "learning_rate": 1.1204463208530936e-05, "loss": 0.0244, "step": 5061, "task_loss": 0.01143595576286316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793637533225583, "compression_loss": 0.0, "distillation_loss": 0.05775732174515724, "epoch": 4.81, "learning_rate": 1.1195578388917092e-05, "loss": 0.0579, "step": 5062, "task_loss": 0.05883469432592392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936432072443695, "compression_loss": 0.0, "distillation_loss": 0.058215852826833725, "epoch": 4.81, "learning_rate": 1.1186696076942916e-05, "loss": 0.0542, "step": 5063, "task_loss": 0.018394893035292625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793648877888777, "compression_loss": 0.0, "distillation_loss": 0.07067988067865372, "epoch": 4.81, "learning_rate": 1.1177816274221911e-05, "loss": 0.0808, "step": 5064, "task_loss": 0.17217287421226501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936545451598089, "compression_loss": 0.0, "distillation_loss": 0.03770400956273079, "epoch": 4.81, "learning_rate": 1.116893898236716e-05, "loss": 0.0355, "step": 5065, "task_loss": 0.015920959413051605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793660209058469, "compression_loss": 0.0, "distillation_loss": 0.049285177141427994, "epoch": 4.81, "learning_rate": 1.1160064202991254e-05, "loss": 0.0594, "step": 5066, "task_loss": 0.14998885989189148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793665869585761, "compression_loss": 0.0, "distillation_loss": 0.03805050998926163, "epoch": 4.81, "learning_rate": 1.115119193770633e-05, "loss": 0.0408, "step": 5067, "task_loss": 0.06544838100671768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936715267426885, "compression_loss": 0.0, "distillation_loss": 0.021483074873685837, "epoch": 4.81, "learning_rate": 1.1142322188124088e-05, "loss": 0.0442, "step": 5068, "task_loss": 0.24901482462882996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936771805302552, "compression_loss": 0.0, "distillation_loss": 0.11341410130262375, "epoch": 4.81, "learning_rate": 1.1133454955855768e-05, "loss": 0.1099, "step": 5069, "task_loss": 0.0778903216123581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936828309494648, "compression_loss": 0.0, "distillation_loss": 0.1032809242606163, "epoch": 4.81, "learning_rate": 1.1124590242512137e-05, "loss": 0.0993, "step": 5070, "task_loss": 0.06375472992658615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793688478001321, "compression_loss": 0.0, "distillation_loss": 0.04365667700767517, "epoch": 4.82, "learning_rate": 1.1115728049703503e-05, "loss": 0.0504, "step": 5071, "task_loss": 0.11084698140621185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936941216868274, "compression_loss": 0.0, "distillation_loss": 0.015161161310970783, "epoch": 4.82, "learning_rate": 1.1106868379039747e-05, "loss": 0.0141, "step": 5072, "task_loss": 0.004572039470076561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7936997620069878, "compression_loss": 0.0, "distillation_loss": 0.038352642208337784, "epoch": 4.82, "learning_rate": 1.109801123213026e-05, "loss": 0.0374, "step": 5073, "task_loss": 0.02898401767015457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937053989628058, "compression_loss": 0.0, "distillation_loss": 0.07865383476018906, "epoch": 4.82, "learning_rate": 1.1089156610583985e-05, "loss": 0.0769, "step": 5074, "task_loss": 0.061387475579977036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937110325552851, "compression_loss": 0.0, "distillation_loss": 0.024030856788158417, "epoch": 4.82, "learning_rate": 1.1080304516009399e-05, "loss": 0.024, "step": 5075, "task_loss": 0.023738959804177284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937166627854294, "compression_loss": 0.0, "distillation_loss": 0.0827215313911438, "epoch": 4.82, "learning_rate": 1.1071454950014533e-05, "loss": 0.1135, "step": 5076, "task_loss": 0.3905654847621918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937222896542424, "compression_loss": 0.0, "distillation_loss": 0.06419490277767181, "epoch": 4.82, "learning_rate": 1.1062607914206965e-05, "loss": 0.071, "step": 5077, "task_loss": 0.13255758583545685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937279131627277, "compression_loss": 0.0, "distillation_loss": 0.044310204684734344, "epoch": 4.82, "learning_rate": 1.1053763410193788e-05, "loss": 0.057, "step": 5078, "task_loss": 0.17076875269412994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793733533311889, "compression_loss": 0.0, "distillation_loss": 0.05891291797161102, "epoch": 4.82, "learning_rate": 1.104492143958166e-05, "loss": 0.0659, "step": 5079, "task_loss": 0.1289033591747284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937391501027301, "compression_loss": 0.0, "distillation_loss": 0.05876559019088745, "epoch": 4.82, "learning_rate": 1.1036082003976759e-05, "loss": 0.0594, "step": 5080, "task_loss": 0.06525548547506332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937447635362546, "compression_loss": 0.0, "distillation_loss": 0.1539030373096466, "epoch": 4.83, "learning_rate": 1.1027245104984816e-05, "loss": 0.1464, "step": 5081, "task_loss": 0.07844631373882294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937503736134661, "compression_loss": 0.0, "distillation_loss": 0.12115428596735, "epoch": 4.83, "learning_rate": 1.1018410744211081e-05, "loss": 0.1157, "step": 5082, "task_loss": 0.06699676811695099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937559803353684, "compression_loss": 0.0, "distillation_loss": 0.03977685421705246, "epoch": 4.83, "learning_rate": 1.100957892326038e-05, "loss": 0.0385, "step": 5083, "task_loss": 0.027261190116405487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937615837029652, "compression_loss": 0.0, "distillation_loss": 0.1250099539756775, "epoch": 4.83, "learning_rate": 1.100074964373705e-05, "loss": 0.1183, "step": 5084, "task_loss": 0.058270517736673355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937671837172601, "compression_loss": 0.0, "distillation_loss": 0.015824340283870697, "epoch": 4.83, "learning_rate": 1.0991922907244944e-05, "loss": 0.0147, "step": 5085, "task_loss": 0.004903359338641167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937727803792568, "compression_loss": 0.0, "distillation_loss": 0.01482314057648182, "epoch": 4.83, "learning_rate": 1.0983098715387526e-05, "loss": 0.0138, "step": 5086, "task_loss": 0.004439596086740494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793778373689959, "compression_loss": 0.0, "distillation_loss": 0.038871392607688904, "epoch": 4.83, "learning_rate": 1.0974277069767735e-05, "loss": 0.044, "step": 5087, "task_loss": 0.09021726995706558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937839636503703, "compression_loss": 0.0, "distillation_loss": 0.0341789647936821, "epoch": 4.83, "learning_rate": 1.096545797198806e-05, "loss": 0.045, "step": 5088, "task_loss": 0.14275771379470825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937895502614946, "compression_loss": 0.0, "distillation_loss": 0.02304195612668991, "epoch": 4.83, "learning_rate": 1.095664142365053e-05, "loss": 0.0342, "step": 5089, "task_loss": 0.13422459363937378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7937951335243353, "compression_loss": 0.0, "distillation_loss": 0.027511442080140114, "epoch": 4.83, "learning_rate": 1.0947827426356725e-05, "loss": 0.0257, "step": 5090, "task_loss": 0.009677274152636528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938007134398962, "compression_loss": 0.0, "distillation_loss": 0.039502017199993134, "epoch": 4.83, "learning_rate": 1.0939015981707746e-05, "loss": 0.054, "step": 5091, "task_loss": 0.18460404872894287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938062900091811, "compression_loss": 0.0, "distillation_loss": 0.03704231232404709, "epoch": 4.84, "learning_rate": 1.093020709130422e-05, "loss": 0.0372, "step": 5092, "task_loss": 0.038229990750551224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938118632331935, "compression_loss": 0.0, "distillation_loss": 0.05441391468048096, "epoch": 4.84, "learning_rate": 1.0921400756746338e-05, "loss": 0.0582, "step": 5093, "task_loss": 0.09189236164093018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938174331129373, "compression_loss": 0.0, "distillation_loss": 0.1751764416694641, "epoch": 4.84, "learning_rate": 1.0912596979633824e-05, "loss": 0.173, "step": 5094, "task_loss": 0.15349790453910828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938229996494159, "compression_loss": 0.0, "distillation_loss": 0.03634584695100784, "epoch": 4.84, "learning_rate": 1.0903795761565914e-05, "loss": 0.0338, "step": 5095, "task_loss": 0.010769536718726158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938285628436332, "compression_loss": 0.0, "distillation_loss": 0.06358032673597336, "epoch": 4.84, "learning_rate": 1.0894997104141378e-05, "loss": 0.0612, "step": 5096, "task_loss": 0.03997098281979561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938341226965929, "compression_loss": 0.0, "distillation_loss": 0.06305442750453949, "epoch": 4.84, "learning_rate": 1.088620100895856e-05, "loss": 0.0676, "step": 5097, "task_loss": 0.10872484743595123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938396792092985, "compression_loss": 0.0, "distillation_loss": 0.02636984921991825, "epoch": 4.84, "learning_rate": 1.0877407477615304e-05, "loss": 0.025, "step": 5098, "task_loss": 0.013119250535964966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938452323827537, "compression_loss": 0.0, "distillation_loss": 0.04463554173707962, "epoch": 4.84, "learning_rate": 1.0868616511708982e-05, "loss": 0.0485, "step": 5099, "task_loss": 0.0837230458855629 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938507822179625, "compression_loss": 0.0, "distillation_loss": 0.045190710574388504, "epoch": 4.84, "learning_rate": 1.085982811283654e-05, "loss": 0.0637, "step": 5100, "task_loss": 0.23051756620407104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938563287159282, "compression_loss": 0.0, "distillation_loss": 0.05444833263754845, "epoch": 4.84, "learning_rate": 1.085104228259442e-05, "loss": 0.0613, "step": 5101, "task_loss": 0.12304200232028961 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938618718776546, "compression_loss": 0.0, "distillation_loss": 0.03785646706819534, "epoch": 4.85, "learning_rate": 1.0842259022578602e-05, "loss": 0.0397, "step": 5102, "task_loss": 0.05668614059686661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938674117041453, "compression_loss": 0.0, "distillation_loss": 0.04682941734790802, "epoch": 4.85, "learning_rate": 1.0833478334384617e-05, "loss": 0.0546, "step": 5103, "task_loss": 0.12421214580535889 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938729481964042, "compression_loss": 0.0, "distillation_loss": 0.09900999814271927, "epoch": 4.85, "learning_rate": 1.082470021960753e-05, "loss": 0.0984, "step": 5104, "task_loss": 0.09260520339012146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938784813554349, "compression_loss": 0.0, "distillation_loss": 0.021003127098083496, "epoch": 4.85, "learning_rate": 1.0815924679841916e-05, "loss": 0.0387, "step": 5105, "task_loss": 0.19845835864543915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938840111822411, "compression_loss": 0.0, "distillation_loss": 0.038200899958610535, "epoch": 4.85, "learning_rate": 1.0807151716681885e-05, "loss": 0.0421, "step": 5106, "task_loss": 0.0776185691356659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938895376778262, "compression_loss": 0.0, "distillation_loss": 0.016974829137325287, "epoch": 4.85, "learning_rate": 1.0798381331721109e-05, "loss": 0.03, "step": 5107, "task_loss": 0.14757771790027618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7938950608431943, "compression_loss": 0.0, "distillation_loss": 0.038715705275535583, "epoch": 4.85, "learning_rate": 1.0789613526552758e-05, "loss": 0.0371, "step": 5108, "task_loss": 0.02297482080757618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939005806793489, "compression_loss": 0.0, "distillation_loss": 0.029004612937569618, "epoch": 4.85, "learning_rate": 1.0780848302769542e-05, "loss": 0.036, "step": 5109, "task_loss": 0.09945765137672424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939060971872937, "compression_loss": 0.0, "distillation_loss": 0.018345588818192482, "epoch": 4.85, "learning_rate": 1.0772085661963708e-05, "loss": 0.0224, "step": 5110, "task_loss": 0.05867563933134079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939116103680323, "compression_loss": 0.0, "distillation_loss": 0.026313165202736855, "epoch": 4.85, "learning_rate": 1.0763325605727048e-05, "loss": 0.024, "step": 5111, "task_loss": 0.003129279240965843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939171202225684, "compression_loss": 0.0, "distillation_loss": 0.0785897821187973, "epoch": 4.85, "learning_rate": 1.0754568135650856e-05, "loss": 0.0924, "step": 5112, "task_loss": 0.2167954295873642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939226267519057, "compression_loss": 0.0, "distillation_loss": 0.028556518256664276, "epoch": 4.86, "learning_rate": 1.0745813253325957e-05, "loss": 0.0266, "step": 5113, "task_loss": 0.0085418950766325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939281299570481, "compression_loss": 0.0, "distillation_loss": 0.020152829587459564, "epoch": 4.86, "learning_rate": 1.073706096034274e-05, "loss": 0.0343, "step": 5114, "task_loss": 0.161162868142128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793933629838999, "compression_loss": 0.0, "distillation_loss": 0.03302818909287453, "epoch": 4.86, "learning_rate": 1.072831125829109e-05, "loss": 0.0397, "step": 5115, "task_loss": 0.09927168488502502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939391263987621, "compression_loss": 0.0, "distillation_loss": 0.027194611728191376, "epoch": 4.86, "learning_rate": 1.0719564148760427e-05, "loss": 0.0256, "step": 5116, "task_loss": 0.011119823902845383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939446196373412, "compression_loss": 0.0, "distillation_loss": 0.03556310385465622, "epoch": 4.86, "learning_rate": 1.0710819633339719e-05, "loss": 0.0339, "step": 5117, "task_loss": 0.018816489726305008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939501095557399, "compression_loss": 0.0, "distillation_loss": 0.3605664372444153, "epoch": 4.86, "learning_rate": 1.0702077713617442e-05, "loss": 0.3543, "step": 5118, "task_loss": 0.2975212633609772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939555961549619, "compression_loss": 0.0, "distillation_loss": 0.02403501234948635, "epoch": 4.86, "learning_rate": 1.0693338391181598e-05, "loss": 0.0224, "step": 5119, "task_loss": 0.007670925930142403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793961079436011, "compression_loss": 0.0, "distillation_loss": 0.13072296977043152, "epoch": 4.86, "learning_rate": 1.0684601667619736e-05, "loss": 0.1322, "step": 5120, "task_loss": 0.1455729752779007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939665593998908, "compression_loss": 0.0, "distillation_loss": 0.03021019697189331, "epoch": 4.86, "learning_rate": 1.0675867544518935e-05, "loss": 0.0285, "step": 5121, "task_loss": 0.01261189952492714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939720360476049, "compression_loss": 0.0, "distillation_loss": 0.09756823629140854, "epoch": 4.86, "learning_rate": 1.0667136023465782e-05, "loss": 0.0941, "step": 5122, "task_loss": 0.06310638785362244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.793977509380157, "compression_loss": 0.0, "distillation_loss": 0.020023830235004425, "epoch": 4.87, "learning_rate": 1.0658407106046389e-05, "loss": 0.0209, "step": 5123, "task_loss": 0.029044657945632935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939829793985509, "compression_loss": 0.0, "distillation_loss": 0.06327167898416519, "epoch": 4.87, "learning_rate": 1.0649680793846423e-05, "loss": 0.0785, "step": 5124, "task_loss": 0.21557585895061493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939884461037902, "compression_loss": 0.0, "distillation_loss": 0.0167409535497427, "epoch": 4.87, "learning_rate": 1.0640957088451054e-05, "loss": 0.0207, "step": 5125, "task_loss": 0.056279633194208145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939939094968785, "compression_loss": 0.0, "distillation_loss": 0.03627139329910278, "epoch": 4.87, "learning_rate": 1.0632235991444972e-05, "loss": 0.0342, "step": 5126, "task_loss": 0.01587931625545025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7939993695788197, "compression_loss": 0.0, "distillation_loss": 0.023063641041517258, "epoch": 4.87, "learning_rate": 1.0623517504412429e-05, "loss": 0.0296, "step": 5127, "task_loss": 0.08890461921691895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940048263506173, "compression_loss": 0.0, "distillation_loss": 0.021907242015004158, "epoch": 4.87, "learning_rate": 1.061480162893716e-05, "loss": 0.0297, "step": 5128, "task_loss": 0.09995594620704651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940102798132751, "compression_loss": 0.0, "distillation_loss": 0.05496376007795334, "epoch": 4.87, "learning_rate": 1.0606088366602462e-05, "loss": 0.0606, "step": 5129, "task_loss": 0.11096987873315811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940157299677967, "compression_loss": 0.0, "distillation_loss": 0.04084121435880661, "epoch": 4.87, "learning_rate": 1.0597377718991127e-05, "loss": 0.0394, "step": 5130, "task_loss": 0.02599833346903324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940211768151857, "compression_loss": 0.0, "distillation_loss": 0.08682038635015488, "epoch": 4.87, "learning_rate": 1.0588669687685502e-05, "loss": 0.0918, "step": 5131, "task_loss": 0.13667771220207214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794026620356446, "compression_loss": 0.0, "distillation_loss": 0.056324686855077744, "epoch": 4.87, "learning_rate": 1.0579964274267434e-05, "loss": 0.0604, "step": 5132, "task_loss": 0.0968530923128128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940320605925811, "compression_loss": 0.0, "distillation_loss": 0.022295914590358734, "epoch": 4.87, "learning_rate": 1.0571261480318295e-05, "loss": 0.0266, "step": 5133, "task_loss": 0.0648619681596756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940374975245948, "compression_loss": 0.0, "distillation_loss": 0.019165407866239548, "epoch": 4.88, "learning_rate": 1.0562561307419005e-05, "loss": 0.0193, "step": 5134, "task_loss": 0.020621633157134056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940429311534907, "compression_loss": 0.0, "distillation_loss": 0.10466399788856506, "epoch": 4.88, "learning_rate": 1.0553863757149985e-05, "loss": 0.1121, "step": 5135, "task_loss": 0.17872130870819092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940483614802726, "compression_loss": 0.0, "distillation_loss": 0.0792841836810112, "epoch": 4.88, "learning_rate": 1.0545168831091178e-05, "loss": 0.0759, "step": 5136, "task_loss": 0.04564621299505234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794053788505944, "compression_loss": 0.0, "distillation_loss": 0.038627780973911285, "epoch": 4.88, "learning_rate": 1.0536476530822071e-05, "loss": 0.0547, "step": 5137, "task_loss": 0.19965079426765442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940592122315087, "compression_loss": 0.0, "distillation_loss": 0.030025865882635117, "epoch": 4.88, "learning_rate": 1.0527786857921665e-05, "loss": 0.0306, "step": 5138, "task_loss": 0.03600824251770973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940646326579703, "compression_loss": 0.0, "distillation_loss": 0.054387472569942474, "epoch": 4.88, "learning_rate": 1.0519099813968475e-05, "loss": 0.0584, "step": 5139, "task_loss": 0.0948576033115387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940700497863327, "compression_loss": 0.0, "distillation_loss": 0.04773625731468201, "epoch": 4.88, "learning_rate": 1.0510415400540538e-05, "loss": 0.0494, "step": 5140, "task_loss": 0.06462717801332474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940754636175993, "compression_loss": 0.0, "distillation_loss": 0.06973198801279068, "epoch": 4.88, "learning_rate": 1.0501733619215432e-05, "loss": 0.0686, "step": 5141, "task_loss": 0.05870828405022621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940808741527738, "compression_loss": 0.0, "distillation_loss": 0.08635716885328293, "epoch": 4.88, "learning_rate": 1.049305447157024e-05, "loss": 0.1117, "step": 5142, "task_loss": 0.34026485681533813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940862813928602, "compression_loss": 0.0, "distillation_loss": 0.019729439169168472, "epoch": 4.88, "learning_rate": 1.048437795918156e-05, "loss": 0.0181, "step": 5143, "task_loss": 0.0036925822496414185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940916853388619, "compression_loss": 0.0, "distillation_loss": 0.03241331875324249, "epoch": 4.89, "learning_rate": 1.0475704083625543e-05, "loss": 0.0368, "step": 5144, "task_loss": 0.07607822120189667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7940970859917826, "compression_loss": 0.0, "distillation_loss": 0.033501721918582916, "epoch": 4.89, "learning_rate": 1.0467032846477818e-05, "loss": 0.0338, "step": 5145, "task_loss": 0.03676898777484894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941024833526261, "compression_loss": 0.0, "distillation_loss": 0.04217281937599182, "epoch": 4.89, "learning_rate": 1.0458364249313577e-05, "loss": 0.0449, "step": 5146, "task_loss": 0.06914474070072174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941078774223961, "compression_loss": 0.0, "distillation_loss": 0.06948964297771454, "epoch": 4.89, "learning_rate": 1.0449698293707494e-05, "loss": 0.0717, "step": 5147, "task_loss": 0.09196187555789948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794113268202096, "compression_loss": 0.0, "distillation_loss": 0.16506507992744446, "epoch": 4.89, "learning_rate": 1.0441034981233802e-05, "loss": 0.1604, "step": 5148, "task_loss": 0.11810018122196198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941186556927298, "compression_loss": 0.0, "distillation_loss": 0.05315421149134636, "epoch": 4.89, "learning_rate": 1.043237431346622e-05, "loss": 0.0599, "step": 5149, "task_loss": 0.12041810899972916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794124039895301, "compression_loss": 0.0, "distillation_loss": 0.1707717776298523, "epoch": 4.89, "learning_rate": 1.0423716291977995e-05, "loss": 0.1788, "step": 5150, "task_loss": 0.2513020634651184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941294208108134, "compression_loss": 0.0, "distillation_loss": 0.08805613964796066, "epoch": 4.89, "learning_rate": 1.0415060918341915e-05, "loss": 0.0987, "step": 5151, "task_loss": 0.1942642778158188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941347984402707, "compression_loss": 0.0, "distillation_loss": 0.017222080379724503, "epoch": 4.89, "learning_rate": 1.0406408194130259e-05, "loss": 0.026, "step": 5152, "task_loss": 0.10548414289951324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941401727846764, "compression_loss": 0.0, "distillation_loss": 0.15333212912082672, "epoch": 4.89, "learning_rate": 1.0397758120914838e-05, "loss": 0.1523, "step": 5153, "task_loss": 0.1433287113904953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941455438450343, "compression_loss": 0.0, "distillation_loss": 0.036811452358961105, "epoch": 4.89, "learning_rate": 1.0389110700266965e-05, "loss": 0.0339, "step": 5154, "task_loss": 0.007728196680545807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941509116223481, "compression_loss": 0.0, "distillation_loss": 0.02540665678679943, "epoch": 4.9, "learning_rate": 1.0380465933757516e-05, "loss": 0.0312, "step": 5155, "task_loss": 0.08360700309276581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941562761176214, "compression_loss": 0.0, "distillation_loss": 0.036480970680713654, "epoch": 4.9, "learning_rate": 1.037182382295684e-05, "loss": 0.0338, "step": 5156, "task_loss": 0.009625611826777458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794161637331858, "compression_loss": 0.0, "distillation_loss": 0.16506238281726837, "epoch": 4.9, "learning_rate": 1.0363184369434803e-05, "loss": 0.1557, "step": 5157, "task_loss": 0.07142955809831619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941669952660615, "compression_loss": 0.0, "distillation_loss": 0.13784891366958618, "epoch": 4.9, "learning_rate": 1.0354547574760828e-05, "loss": 0.1407, "step": 5158, "task_loss": 0.16680915653705597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941723499212356, "compression_loss": 0.0, "distillation_loss": 0.05448282137513161, "epoch": 4.9, "learning_rate": 1.034591344050382e-05, "loss": 0.0615, "step": 5159, "task_loss": 0.125069722533226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941777012983839, "compression_loss": 0.0, "distillation_loss": 0.0830635204911232, "epoch": 4.9, "learning_rate": 1.0337281968232213e-05, "loss": 0.0912, "step": 5160, "task_loss": 0.1648905873298645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941830493985104, "compression_loss": 0.0, "distillation_loss": 0.09780248999595642, "epoch": 4.9, "learning_rate": 1.032865315951394e-05, "loss": 0.1198, "step": 5161, "task_loss": 0.3177647292613983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941883942226183, "compression_loss": 0.0, "distillation_loss": 0.030118845403194427, "epoch": 4.9, "learning_rate": 1.0320027015916481e-05, "loss": 0.0312, "step": 5162, "task_loss": 0.04085350036621094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7941937357717117, "compression_loss": 0.0, "distillation_loss": 0.019481580704450607, "epoch": 4.9, "learning_rate": 1.0311403539006823e-05, "loss": 0.0233, "step": 5163, "task_loss": 0.05726565048098564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794199074046794, "compression_loss": 0.0, "distillation_loss": 0.08498305082321167, "epoch": 4.9, "learning_rate": 1.0302782730351443e-05, "loss": 0.0813, "step": 5164, "task_loss": 0.047981880605220795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794204409048869, "compression_loss": 0.0, "distillation_loss": 0.09883970767259598, "epoch": 4.91, "learning_rate": 1.0294164591516372e-05, "loss": 0.1042, "step": 5165, "task_loss": 0.15241563320159912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942097407789405, "compression_loss": 0.0, "distillation_loss": 0.13842526078224182, "epoch": 4.91, "learning_rate": 1.0285549124067126e-05, "loss": 0.1384, "step": 5166, "task_loss": 0.13855035603046417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794215069238012, "compression_loss": 0.0, "distillation_loss": 0.2038910686969757, "epoch": 4.91, "learning_rate": 1.0276936329568748e-05, "loss": 0.2098, "step": 5167, "task_loss": 0.26268768310546875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942203944270871, "compression_loss": 0.0, "distillation_loss": 0.09594913572072983, "epoch": 4.91, "learning_rate": 1.0268326209585784e-05, "loss": 0.0974, "step": 5168, "task_loss": 0.11042402684688568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942257163471699, "compression_loss": 0.0, "distillation_loss": 0.09930403530597687, "epoch": 4.91, "learning_rate": 1.0259718765682321e-05, "loss": 0.1025, "step": 5169, "task_loss": 0.13175342977046967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942310349992636, "compression_loss": 0.0, "distillation_loss": 0.19986680150032043, "epoch": 4.91, "learning_rate": 1.0251113999421935e-05, "loss": 0.1976, "step": 5170, "task_loss": 0.1776677966117859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942363503843721, "compression_loss": 0.0, "distillation_loss": 0.07311776280403137, "epoch": 4.91, "learning_rate": 1.0242511912367706e-05, "loss": 0.0678, "step": 5171, "task_loss": 0.019507795572280884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942416625034991, "compression_loss": 0.0, "distillation_loss": 0.041407689452171326, "epoch": 4.91, "learning_rate": 1.0233912506082277e-05, "loss": 0.0407, "step": 5172, "task_loss": 0.03435212001204491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942469713576482, "compression_loss": 0.0, "distillation_loss": 0.03155471384525299, "epoch": 4.91, "learning_rate": 1.0225315782127756e-05, "loss": 0.0368, "step": 5173, "task_loss": 0.0840642973780632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942522769478233, "compression_loss": 0.0, "distillation_loss": 0.059278104454278946, "epoch": 4.91, "learning_rate": 1.0216721742065777e-05, "loss": 0.0573, "step": 5174, "task_loss": 0.03901209309697151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942575792750277, "compression_loss": 0.0, "distillation_loss": 0.09744984656572342, "epoch": 4.91, "learning_rate": 1.0208130387457485e-05, "loss": 0.1025, "step": 5175, "task_loss": 0.14773344993591309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942628783402654, "compression_loss": 0.0, "distillation_loss": 0.04598250985145569, "epoch": 4.92, "learning_rate": 1.0199541719863554e-05, "loss": 0.0494, "step": 5176, "task_loss": 0.08029758185148239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79426817414454, "compression_loss": 0.0, "distillation_loss": 0.06423640996217728, "epoch": 4.92, "learning_rate": 1.0190955740844147e-05, "loss": 0.0723, "step": 5177, "task_loss": 0.1453644037246704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942734666888551, "compression_loss": 0.0, "distillation_loss": 0.07094942778348923, "epoch": 4.92, "learning_rate": 1.0182372451958943e-05, "loss": 0.076, "step": 5178, "task_loss": 0.12134627997875214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942787559742146, "compression_loss": 0.0, "distillation_loss": 0.11129570007324219, "epoch": 4.92, "learning_rate": 1.0173791854767155e-05, "loss": 0.1065, "step": 5179, "task_loss": 0.06368857622146606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942840420016218, "compression_loss": 0.0, "distillation_loss": 0.04373692348599434, "epoch": 4.92, "learning_rate": 1.0165213950827473e-05, "loss": 0.0422, "step": 5180, "task_loss": 0.028108179569244385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942893247720808, "compression_loss": 0.0, "distillation_loss": 0.06466871500015259, "epoch": 4.92, "learning_rate": 1.015663874169813e-05, "loss": 0.0863, "step": 5181, "task_loss": 0.28089067339897156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794294604286595, "compression_loss": 0.0, "distillation_loss": 0.04316772520542145, "epoch": 4.92, "learning_rate": 1.0148066228936836e-05, "loss": 0.0488, "step": 5182, "task_loss": 0.09936368465423584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7942998805461682, "compression_loss": 0.0, "distillation_loss": 0.05181555822491646, "epoch": 4.92, "learning_rate": 1.013949641410085e-05, "loss": 0.0584, "step": 5183, "task_loss": 0.117317795753479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794305153551804, "compression_loss": 0.0, "distillation_loss": 0.020150721073150635, "epoch": 4.92, "learning_rate": 1.0130929298746909e-05, "loss": 0.0317, "step": 5184, "task_loss": 0.1358027458190918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943104233045062, "compression_loss": 0.0, "distillation_loss": 0.06334016472101212, "epoch": 4.92, "learning_rate": 1.0122364884431259e-05, "loss": 0.0633, "step": 5185, "task_loss": 0.062485143542289734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943156898052783, "compression_loss": 0.0, "distillation_loss": 0.05838319659233093, "epoch": 4.92, "learning_rate": 1.011380317270969e-05, "loss": 0.0593, "step": 5186, "task_loss": 0.06781870126724243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943209530551243, "compression_loss": 0.0, "distillation_loss": 0.033147186040878296, "epoch": 4.93, "learning_rate": 1.0105244165137467e-05, "loss": 0.0457, "step": 5187, "task_loss": 0.15893574059009552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943262130550476, "compression_loss": 0.0, "distillation_loss": 0.026804693043231964, "epoch": 4.93, "learning_rate": 1.0096687863269368e-05, "loss": 0.0248, "step": 5188, "task_loss": 0.006701400503516197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943314698060518, "compression_loss": 0.0, "distillation_loss": 0.04605764150619507, "epoch": 4.93, "learning_rate": 1.0088134268659693e-05, "loss": 0.0498, "step": 5189, "task_loss": 0.08354970067739487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794336723309141, "compression_loss": 0.0, "distillation_loss": 0.033246997743844986, "epoch": 4.93, "learning_rate": 1.0079583382862254e-05, "loss": 0.0326, "step": 5190, "task_loss": 0.026343755424022675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943419735653184, "compression_loss": 0.0, "distillation_loss": 0.06967267394065857, "epoch": 4.93, "learning_rate": 1.0071035207430352e-05, "loss": 0.0715, "step": 5191, "task_loss": 0.08810234814882278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943472205755882, "compression_loss": 0.0, "distillation_loss": 0.02802272140979767, "epoch": 4.93, "learning_rate": 1.0062489743916792e-05, "loss": 0.0287, "step": 5192, "task_loss": 0.03505971282720566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943524643409535, "compression_loss": 0.0, "distillation_loss": 0.07618989050388336, "epoch": 4.93, "learning_rate": 1.005394699387392e-05, "loss": 0.0715, "step": 5193, "task_loss": 0.029166080057621002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943577048624184, "compression_loss": 0.0, "distillation_loss": 0.033866822719573975, "epoch": 4.93, "learning_rate": 1.0045406958853556e-05, "loss": 0.0316, "step": 5194, "task_loss": 0.010730434209108353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943629421409865, "compression_loss": 0.0, "distillation_loss": 0.03229956328868866, "epoch": 4.93, "learning_rate": 1.0036869640407031e-05, "loss": 0.0305, "step": 5195, "task_loss": 0.01465969905257225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943681761776614, "compression_loss": 0.0, "distillation_loss": 0.040657661855220795, "epoch": 4.93, "learning_rate": 1.002833504008521e-05, "loss": 0.0535, "step": 5196, "task_loss": 0.16867557168006897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943734069734468, "compression_loss": 0.0, "distillation_loss": 0.12660034000873566, "epoch": 4.94, "learning_rate": 1.0019803159438423e-05, "loss": 0.1192, "step": 5197, "task_loss": 0.05233707278966904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943786345293464, "compression_loss": 0.0, "distillation_loss": 0.24157142639160156, "epoch": 4.94, "learning_rate": 1.0011274000016547e-05, "loss": 0.237, "step": 5198, "task_loss": 0.19573716819286346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943838588463639, "compression_loss": 0.0, "distillation_loss": 0.10286252945661545, "epoch": 4.94, "learning_rate": 1.0002747563368928e-05, "loss": 0.1052, "step": 5199, "task_loss": 0.12575237452983856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794389079925503, "compression_loss": 0.0, "distillation_loss": 0.1292041838169098, "epoch": 4.94, "learning_rate": 9.994223851044449e-06, "loss": 0.1243, "step": 5200, "task_loss": 0.07981815189123154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943942977677673, "compression_loss": 0.0, "distillation_loss": 0.04530710726976395, "epoch": 4.94, "learning_rate": 9.985702864591477e-06, "loss": 0.0477, "step": 5201, "task_loss": 0.06911186873912811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7943995123741605, "compression_loss": 0.0, "distillation_loss": 0.04243285953998566, "epoch": 4.94, "learning_rate": 9.977184605557877e-06, "loss": 0.0385, "step": 5202, "task_loss": 0.003344038501381874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944047237456864, "compression_loss": 0.0, "distillation_loss": 0.01720603182911873, "epoch": 4.94, "learning_rate": 9.968669075491054e-06, "loss": 0.0204, "step": 5203, "task_loss": 0.04868287593126297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944099318833486, "compression_loss": 0.0, "distillation_loss": 0.02283567562699318, "epoch": 4.94, "learning_rate": 9.960156275937887e-06, "loss": 0.0279, "step": 5204, "task_loss": 0.07395943999290466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944151367881507, "compression_loss": 0.0, "distillation_loss": 0.012866092845797539, "epoch": 4.94, "learning_rate": 9.951646208444756e-06, "loss": 0.0119, "step": 5205, "task_loss": 0.0030320733785629272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944203384610965, "compression_loss": 0.0, "distillation_loss": 0.03689184784889221, "epoch": 4.94, "learning_rate": 9.943138874557564e-06, "loss": 0.0415, "step": 5206, "task_loss": 0.0826156958937645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944255369031896, "compression_loss": 0.0, "distillation_loss": 0.01845708116889, "epoch": 4.94, "learning_rate": 9.934634275821722e-06, "loss": 0.0175, "step": 5207, "task_loss": 0.009108863770961761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944307321154338, "compression_loss": 0.0, "distillation_loss": 0.01570175029337406, "epoch": 4.95, "learning_rate": 9.926132413782116e-06, "loss": 0.0147, "step": 5208, "task_loss": 0.006071802228689194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944359240988327, "compression_loss": 0.0, "distillation_loss": 0.013230700977146626, "epoch": 4.95, "learning_rate": 9.917633289983144e-06, "loss": 0.0125, "step": 5209, "task_loss": 0.0057407524436712265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.79444111285439, "compression_loss": 0.0, "distillation_loss": 0.035949304699897766, "epoch": 4.95, "learning_rate": 9.909136905968736e-06, "loss": 0.0461, "step": 5210, "task_loss": 0.13782669603824615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944462983831093, "compression_loss": 0.0, "distillation_loss": 0.03853258118033409, "epoch": 4.95, "learning_rate": 9.900643263282283e-06, "loss": 0.0386, "step": 5211, "task_loss": 0.03873224928975105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944514806859944, "compression_loss": 0.0, "distillation_loss": 0.036589786410331726, "epoch": 4.95, "learning_rate": 9.892152363466691e-06, "loss": 0.036, "step": 5212, "task_loss": 0.03055756166577339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944566597640489, "compression_loss": 0.0, "distillation_loss": 0.03954382613301277, "epoch": 4.95, "learning_rate": 9.883664208064394e-06, "loss": 0.0415, "step": 5213, "task_loss": 0.05872972309589386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944618356182765, "compression_loss": 0.0, "distillation_loss": 0.0241532102227211, "epoch": 4.95, "learning_rate": 9.875178798617286e-06, "loss": 0.0284, "step": 5214, "task_loss": 0.06614520400762558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794467008249681, "compression_loss": 0.0, "distillation_loss": 0.021967854350805283, "epoch": 4.95, "learning_rate": 9.866696136666798e-06, "loss": 0.0208, "step": 5215, "task_loss": 0.010009793564677238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944721776592659, "compression_loss": 0.0, "distillation_loss": 0.02724001184105873, "epoch": 4.95, "learning_rate": 9.858216223753833e-06, "loss": 0.0286, "step": 5216, "task_loss": 0.04124392196536064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794477343848035, "compression_loss": 0.0, "distillation_loss": 0.027787886559963226, "epoch": 4.95, "learning_rate": 9.849739061418823e-06, "loss": 0.0329, "step": 5217, "task_loss": 0.07907851040363312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794482506816992, "compression_loss": 0.0, "distillation_loss": 0.08973988890647888, "epoch": 4.96, "learning_rate": 9.841264651201676e-06, "loss": 0.1021, "step": 5218, "task_loss": 0.21334464848041534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944876665671404, "compression_loss": 0.0, "distillation_loss": 0.06084804609417915, "epoch": 4.96, "learning_rate": 9.832792994641801e-06, "loss": 0.0676, "step": 5219, "task_loss": 0.12829428911209106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944928230994841, "compression_loss": 0.0, "distillation_loss": 0.01122710295021534, "epoch": 4.96, "learning_rate": 9.824324093278134e-06, "loss": 0.0247, "step": 5220, "task_loss": 0.1459704488515854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7944979764150266, "compression_loss": 0.0, "distillation_loss": 0.06464344263076782, "epoch": 4.96, "learning_rate": 9.815857948649082e-06, "loss": 0.0686, "step": 5221, "task_loss": 0.10410156100988388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945031265147717, "compression_loss": 0.0, "distillation_loss": 0.01737487129867077, "epoch": 4.96, "learning_rate": 9.807394562292551e-06, "loss": 0.0214, "step": 5222, "task_loss": 0.05750443786382675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945082733997232, "compression_loss": 0.0, "distillation_loss": 0.04283272475004196, "epoch": 4.96, "learning_rate": 9.798933935745968e-06, "loss": 0.0468, "step": 5223, "task_loss": 0.08252625167369843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945134170708845, "compression_loss": 0.0, "distillation_loss": 0.029033223167061806, "epoch": 4.96, "learning_rate": 9.790476070546257e-06, "loss": 0.0299, "step": 5224, "task_loss": 0.03801552951335907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945185575292595, "compression_loss": 0.0, "distillation_loss": 0.03287568688392639, "epoch": 4.96, "learning_rate": 9.782020968229813e-06, "loss": 0.038, "step": 5225, "task_loss": 0.08447042852640152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945236947758517, "compression_loss": 0.0, "distillation_loss": 0.05485154315829277, "epoch": 4.96, "learning_rate": 9.77356863033254e-06, "loss": 0.0504, "step": 5226, "task_loss": 0.010427333414554596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794528828811665, "compression_loss": 0.0, "distillation_loss": 0.030403347685933113, "epoch": 4.96, "learning_rate": 9.765119058389872e-06, "loss": 0.0276, "step": 5227, "task_loss": 0.0026769302785396576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945339596377028, "compression_loss": 0.0, "distillation_loss": 0.023587405681610107, "epoch": 4.96, "learning_rate": 9.756672253936694e-06, "loss": 0.0219, "step": 5228, "task_loss": 0.007051067426800728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945390872549691, "compression_loss": 0.0, "distillation_loss": 0.13783934712409973, "epoch": 4.97, "learning_rate": 9.748228218507408e-06, "loss": 0.1264, "step": 5229, "task_loss": 0.02342773601412773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945442116644673, "compression_loss": 0.0, "distillation_loss": 0.02738800458610058, "epoch": 4.97, "learning_rate": 9.739786953635924e-06, "loss": 0.0255, "step": 5230, "task_loss": 0.008501600474119186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945493328672013, "compression_loss": 0.0, "distillation_loss": 0.1387009620666504, "epoch": 4.97, "learning_rate": 9.731348460855638e-06, "loss": 0.1468, "step": 5231, "task_loss": 0.21995283663272858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945544508641748, "compression_loss": 0.0, "distillation_loss": 0.02429133839905262, "epoch": 4.97, "learning_rate": 9.722912741699427e-06, "loss": 0.0272, "step": 5232, "task_loss": 0.05331313982605934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945595656563912, "compression_loss": 0.0, "distillation_loss": 0.046510256826877594, "epoch": 4.97, "learning_rate": 9.714479797699694e-06, "loss": 0.0428, "step": 5233, "task_loss": 0.009763523936271667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945646772448545, "compression_loss": 0.0, "distillation_loss": 0.025538455694913864, "epoch": 4.97, "learning_rate": 9.706049630388329e-06, "loss": 0.0352, "step": 5234, "task_loss": 0.12184809148311615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945697856305681, "compression_loss": 0.0, "distillation_loss": 0.11467162519693375, "epoch": 4.97, "learning_rate": 9.697622241296705e-06, "loss": 0.1134, "step": 5235, "task_loss": 0.10236775875091553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945748908145359, "compression_loss": 0.0, "distillation_loss": 0.0173861812800169, "epoch": 4.97, "learning_rate": 9.68919763195569e-06, "loss": 0.0161, "step": 5236, "task_loss": 0.004365898668766022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945799927977615, "compression_loss": 0.0, "distillation_loss": 0.014998473227024078, "epoch": 4.97, "learning_rate": 9.680775803895672e-06, "loss": 0.0141, "step": 5237, "task_loss": 0.005868380889296532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945850915812486, "compression_loss": 0.0, "distillation_loss": 0.04273030161857605, "epoch": 4.97, "learning_rate": 9.672356758646506e-06, "loss": 0.0546, "step": 5238, "task_loss": 0.1609753668308258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945901871660008, "compression_loss": 0.0, "distillation_loss": 0.03701045364141464, "epoch": 4.98, "learning_rate": 9.663940497737556e-06, "loss": 0.0345, "step": 5239, "task_loss": 0.01145661249756813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7945952795530219, "compression_loss": 0.0, "distillation_loss": 0.01707928627729416, "epoch": 4.98, "learning_rate": 9.655527022697652e-06, "loss": 0.0161, "step": 5240, "task_loss": 0.00770198181271553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946003687433155, "compression_loss": 0.0, "distillation_loss": 0.034336067736148834, "epoch": 4.98, "learning_rate": 9.647116335055187e-06, "loss": 0.0393, "step": 5241, "task_loss": 0.08418752998113632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946054547378854, "compression_loss": 0.0, "distillation_loss": 0.026298392564058304, "epoch": 4.98, "learning_rate": 9.638708436337976e-06, "loss": 0.0248, "step": 5242, "task_loss": 0.01096322201192379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946105375377351, "compression_loss": 0.0, "distillation_loss": 0.03858939930796623, "epoch": 4.98, "learning_rate": 9.630303328073352e-06, "loss": 0.0371, "step": 5243, "task_loss": 0.02343849278986454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946156171438685, "compression_loss": 0.0, "distillation_loss": 0.029876116663217545, "epoch": 4.98, "learning_rate": 9.621901011788157e-06, "loss": 0.0363, "step": 5244, "task_loss": 0.09448274224996567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794620693557289, "compression_loss": 0.0, "distillation_loss": 0.07018247991800308, "epoch": 4.98, "learning_rate": 9.613501489008705e-06, "loss": 0.08, "step": 5245, "task_loss": 0.1685934066772461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946257667790007, "compression_loss": 0.0, "distillation_loss": 0.08023358136415482, "epoch": 4.98, "learning_rate": 9.605104761260809e-06, "loss": 0.0914, "step": 5246, "task_loss": 0.19211246073246002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946308368100068, "compression_loss": 0.0, "distillation_loss": 0.020654667168855667, "epoch": 4.98, "learning_rate": 9.596710830069769e-06, "loss": 0.0201, "step": 5247, "task_loss": 0.014910630881786346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946359036513112, "compression_loss": 0.0, "distillation_loss": 0.07811079919338226, "epoch": 4.98, "learning_rate": 9.588319696960396e-06, "loss": 0.0898, "step": 5248, "task_loss": 0.19523471593856812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946409673039178, "compression_loss": 0.0, "distillation_loss": 0.14166250824928284, "epoch": 4.98, "learning_rate": 9.579931363456968e-06, "loss": 0.1623, "step": 5249, "task_loss": 0.3483670949935913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946460277688299, "compression_loss": 0.0, "distillation_loss": 0.11959753930568695, "epoch": 4.99, "learning_rate": 9.571545831083267e-06, "loss": 0.1251, "step": 5250, "task_loss": 0.17435845732688904 }, { "epoch": 4.99, "eval_accuracy": 0.9013761467889908, "eval_loss": 0.40377190709114075, "eval_runtime": 18.2101, "eval_samples_per_second": 47.886, "eval_steps_per_second": 5.986, "step": 5250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946510850470513, "compression_loss": 0.0, "distillation_loss": 0.09491365402936935, "epoch": 4.99, "learning_rate": 9.563163101362582e-06, "loss": 0.0985, "step": 5251, "task_loss": 0.13050074875354767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946561391395859, "compression_loss": 0.0, "distillation_loss": 0.043609265238046646, "epoch": 4.99, "learning_rate": 9.554783175817667e-06, "loss": 0.0419, "step": 5252, "task_loss": 0.0262824147939682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946611900474371, "compression_loss": 0.0, "distillation_loss": 0.03674563765525818, "epoch": 4.99, "learning_rate": 9.546406055970769e-06, "loss": 0.0376, "step": 5253, "task_loss": 0.045376792550086975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946662377716087, "compression_loss": 0.0, "distillation_loss": 0.015710389241576195, "epoch": 4.99, "learning_rate": 9.538031743343628e-06, "loss": 0.0144, "step": 5254, "task_loss": 0.0030107852071523666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946712823131045, "compression_loss": 0.0, "distillation_loss": 0.018925126641988754, "epoch": 4.99, "learning_rate": 9.529660239457497e-06, "loss": 0.0175, "step": 5255, "task_loss": 0.005088077858090401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.794676323672928, "compression_loss": 0.0, "distillation_loss": 0.08420512825250626, "epoch": 4.99, "learning_rate": 9.521291545833086e-06, "loss": 0.0801, "step": 5256, "task_loss": 0.0430089607834816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946813618520829, "compression_loss": 0.0, "distillation_loss": 0.019614659249782562, "epoch": 4.99, "learning_rate": 9.512925663990608e-06, "loss": 0.0183, "step": 5257, "task_loss": 0.006246289238333702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946863968515729, "compression_loss": 0.0, "distillation_loss": 0.029730752110481262, "epoch": 4.99, "learning_rate": 9.504562595449764e-06, "loss": 0.0362, "step": 5258, "task_loss": 0.0940956100821495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946914286724018, "compression_loss": 0.0, "distillation_loss": 0.1385657787322998, "epoch": 4.99, "learning_rate": 9.496202341729765e-06, "loss": 0.1312, "step": 5259, "task_loss": 0.06457454711198807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7946964573155733, "compression_loss": 0.0, "distillation_loss": 0.02025190182030201, "epoch": 5.0, "learning_rate": 9.487844904349274e-06, "loss": 0.0327, "step": 5260, "task_loss": 0.1445349156856537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7947014827820907, "compression_loss": 0.0, "distillation_loss": 0.119037926197052, "epoch": 5.0, "learning_rate": 9.479490284826459e-06, "loss": 0.1162, "step": 5261, "task_loss": 0.09018512070178986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7947065050729581, "compression_loss": 0.0, "distillation_loss": 0.054691120982170105, "epoch": 5.0, "learning_rate": 9.471138484678987e-06, "loss": 0.0638, "step": 5262, "task_loss": 0.14550621807575226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7947115241891791, "compression_loss": 0.0, "distillation_loss": 0.1421840786933899, "epoch": 5.0, "learning_rate": 9.462789505423997e-06, "loss": 0.1377, "step": 5263, "task_loss": 0.09778082370758057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7947165401317573, "compression_loss": 0.0, "distillation_loss": 0.039873864501714706, "epoch": 5.0, "learning_rate": 9.454443348578115e-06, "loss": 0.0551, "step": 5264, "task_loss": 0.19219006597995758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6151754920258597, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7874999929845908, "compression/magnitude_sparsity/target_sparsity_level": 0.7947215529016963, "compression_loss": 0.0, "distillation_loss": 0.021231336519122124, "epoch": 5.0, "learning_rate": 9.446100015657474e-06, "loss": 0.0198, "step": 5265, "task_loss": 0.007257444318383932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947265625000001, "compression_loss": 0.0, "distillation_loss": 0.08732712268829346, "epoch": 5.0, "learning_rate": 9.437759508177665e-06, "loss": 0.0898, "step": 5266, "task_loss": 0.11206232756376266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947315689276719, "compression_loss": 0.0, "distillation_loss": 0.2404271364212036, "epoch": 5.0, "learning_rate": 9.429421827653798e-06, "loss": 0.2203, "step": 5267, "task_loss": 0.03922963887453079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947365721857158, "compression_loss": 0.0, "distillation_loss": 0.48191341757774353, "epoch": 5.0, "learning_rate": 9.421086975600438e-06, "loss": 0.484, "step": 5268, "task_loss": 0.5028781294822693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947415722751353, "compression_loss": 0.0, "distillation_loss": 0.2334894835948944, "epoch": 5.0, "learning_rate": 9.412754953531663e-06, "loss": 0.2161, "step": 5269, "task_loss": 0.059303127229213715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947465691969342, "compression_loss": 0.0, "distillation_loss": 0.17375622689723969, "epoch": 5.0, "learning_rate": 9.404425762961022e-06, "loss": 0.1641, "step": 5270, "task_loss": 0.07717826217412949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.794751562952116, "compression_loss": 0.0, "distillation_loss": 0.40218544006347656, "epoch": 5.01, "learning_rate": 9.396099405401543e-06, "loss": 0.3844, "step": 5271, "task_loss": 0.22383946180343628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947565535416845, "compression_loss": 0.0, "distillation_loss": 0.28495174646377563, "epoch": 5.01, "learning_rate": 9.387775882365763e-06, "loss": 0.2911, "step": 5272, "task_loss": 0.34688353538513184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947615409666433, "compression_loss": 0.0, "distillation_loss": 0.2111060917377472, "epoch": 5.01, "learning_rate": 9.379455195365684e-06, "loss": 0.2001, "step": 5273, "task_loss": 0.10126888751983643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947665252279963, "compression_loss": 0.0, "distillation_loss": 0.15187914669513702, "epoch": 5.01, "learning_rate": 9.371137345912786e-06, "loss": 0.1592, "step": 5274, "task_loss": 0.225071519613266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947715063267469, "compression_loss": 0.0, "distillation_loss": 0.22621603310108185, "epoch": 5.01, "learning_rate": 9.362822335518063e-06, "loss": 0.2191, "step": 5275, "task_loss": 0.15458367764949799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947764842638989, "compression_loss": 0.0, "distillation_loss": 0.11143049597740173, "epoch": 5.01, "learning_rate": 9.35451016569198e-06, "loss": 0.1072, "step": 5276, "task_loss": 0.06862667948007584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.794781459040456, "compression_loss": 0.0, "distillation_loss": 0.070254385471344, "epoch": 5.01, "learning_rate": 9.346200837944474e-06, "loss": 0.0712, "step": 5277, "task_loss": 0.07974936068058014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947864306574219, "compression_loss": 0.0, "distillation_loss": 0.04427797347307205, "epoch": 5.01, "learning_rate": 9.337894353784968e-06, "loss": 0.04, "step": 5278, "task_loss": 0.0014720112085342407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947913991158003, "compression_loss": 0.0, "distillation_loss": 0.23665665090084076, "epoch": 5.01, "learning_rate": 9.329590714722388e-06, "loss": 0.2457, "step": 5279, "task_loss": 0.32665759325027466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7947963644165947, "compression_loss": 0.0, "distillation_loss": 0.18532395362854004, "epoch": 5.01, "learning_rate": 9.321289922265124e-06, "loss": 0.1669, "step": 5280, "task_loss": 0.0015691444277763367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.794801326560809, "compression_loss": 0.0, "distillation_loss": 0.20857329666614532, "epoch": 5.02, "learning_rate": 9.312991977921051e-06, "loss": 0.198, "step": 5281, "task_loss": 0.10306287556886673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948062855494469, "compression_loss": 0.0, "distillation_loss": 0.11233891546726227, "epoch": 5.02, "learning_rate": 9.304696883197542e-06, "loss": 0.1079, "step": 5282, "task_loss": 0.06844587624073029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948112413835119, "compression_loss": 0.0, "distillation_loss": 0.15439686179161072, "epoch": 5.02, "learning_rate": 9.29640463960143e-06, "loss": 0.1511, "step": 5283, "task_loss": 0.12120820581912994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948161940640077, "compression_loss": 0.0, "distillation_loss": 0.07337729632854462, "epoch": 5.02, "learning_rate": 9.28811524863904e-06, "loss": 0.0865, "step": 5284, "task_loss": 0.2042730152606964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948211435919381, "compression_loss": 0.0, "distillation_loss": 0.041931264102458954, "epoch": 5.02, "learning_rate": 9.279828711816182e-06, "loss": 0.0454, "step": 5285, "task_loss": 0.0765017569065094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948260899683067, "compression_loss": 0.0, "distillation_loss": 0.21245259046554565, "epoch": 5.02, "learning_rate": 9.271545030638156e-06, "loss": 0.2016, "step": 5286, "task_loss": 0.10362794995307922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948310331941172, "compression_loss": 0.0, "distillation_loss": 0.06203429400920868, "epoch": 5.02, "learning_rate": 9.263264206609726e-06, "loss": 0.0577, "step": 5287, "task_loss": 0.018689151853322983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948359732703734, "compression_loss": 0.0, "distillation_loss": 0.07664155215024948, "epoch": 5.02, "learning_rate": 9.254986241235131e-06, "loss": 0.0782, "step": 5288, "task_loss": 0.09256763756275177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948409101980788, "compression_loss": 0.0, "distillation_loss": 0.1382894665002823, "epoch": 5.02, "learning_rate": 9.246711136018124e-06, "loss": 0.1313, "step": 5289, "task_loss": 0.06794591248035431 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948458439782371, "compression_loss": 0.0, "distillation_loss": 0.18572860956192017, "epoch": 5.02, "learning_rate": 9.23843889246191e-06, "loss": 0.1894, "step": 5290, "task_loss": 0.22227227687835693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.794850774611852, "compression_loss": 0.0, "distillation_loss": 0.11491254717111588, "epoch": 5.02, "learning_rate": 9.230169512069168e-06, "loss": 0.1096, "step": 5291, "task_loss": 0.061589665710926056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948557020999274, "compression_loss": 0.0, "distillation_loss": 0.03495645150542259, "epoch": 5.03, "learning_rate": 9.221902996342084e-06, "loss": 0.0318, "step": 5292, "task_loss": 0.0034442245960235596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948606264434667, "compression_loss": 0.0, "distillation_loss": 0.32723939418792725, "epoch": 5.03, "learning_rate": 9.213639346782315e-06, "loss": 0.3206, "step": 5293, "task_loss": 0.261165052652359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948655476434737, "compression_loss": 0.0, "distillation_loss": 0.03939376026391983, "epoch": 5.03, "learning_rate": 9.20537856489099e-06, "loss": 0.0538, "step": 5294, "task_loss": 0.18324826657772064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948704657009521, "compression_loss": 0.0, "distillation_loss": 0.14817285537719727, "epoch": 5.03, "learning_rate": 9.197120652168706e-06, "loss": 0.1403, "step": 5295, "task_loss": 0.06911684572696686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948753806169055, "compression_loss": 0.0, "distillation_loss": 0.0447382926940918, "epoch": 5.03, "learning_rate": 9.18886561011557e-06, "loss": 0.0421, "step": 5296, "task_loss": 0.01793717034161091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948802923923376, "compression_loss": 0.0, "distillation_loss": 0.05252248793840408, "epoch": 5.03, "learning_rate": 9.180613440231145e-06, "loss": 0.0492, "step": 5297, "task_loss": 0.019047562032938004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948852010282522, "compression_loss": 0.0, "distillation_loss": 0.10433128476142883, "epoch": 5.03, "learning_rate": 9.172364144014466e-06, "loss": 0.1018, "step": 5298, "task_loss": 0.07895547151565552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948901065256528, "compression_loss": 0.0, "distillation_loss": 0.04161163419485092, "epoch": 5.03, "learning_rate": 9.164117722964077e-06, "loss": 0.0458, "step": 5299, "task_loss": 0.0835949033498764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948950088855433, "compression_loss": 0.0, "distillation_loss": 0.1584656685590744, "epoch": 5.03, "learning_rate": 9.155874178577973e-06, "loss": 0.1606, "step": 5300, "task_loss": 0.18005318939685822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7948999081089272, "compression_loss": 0.0, "distillation_loss": 0.16821911931037903, "epoch": 5.03, "learning_rate": 9.147633512353618e-06, "loss": 0.1554, "step": 5301, "task_loss": 0.04032587260007858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949048041968082, "compression_loss": 0.0, "distillation_loss": 0.046078503131866455, "epoch": 5.04, "learning_rate": 9.139395725787981e-06, "loss": 0.0506, "step": 5302, "task_loss": 0.09137886017560959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79490969715019, "compression_loss": 0.0, "distillation_loss": 0.1656574308872223, "epoch": 5.04, "learning_rate": 9.131160820377505e-06, "loss": 0.1672, "step": 5303, "task_loss": 0.18076612055301666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949145869700764, "compression_loss": 0.0, "distillation_loss": 0.159349262714386, "epoch": 5.04, "learning_rate": 9.12292879761809e-06, "loss": 0.1652, "step": 5304, "task_loss": 0.21749325096607208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.794919473657471, "compression_loss": 0.0, "distillation_loss": 0.09572987258434296, "epoch": 5.04, "learning_rate": 9.11469965900512e-06, "loss": 0.1055, "step": 5305, "task_loss": 0.19356384873390198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949243572133774, "compression_loss": 0.0, "distillation_loss": 0.06085019186139107, "epoch": 5.04, "learning_rate": 9.106473406033464e-06, "loss": 0.0755, "step": 5306, "task_loss": 0.2072937786579132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949292376387993, "compression_loss": 0.0, "distillation_loss": 0.09842538833618164, "epoch": 5.04, "learning_rate": 9.098250040197462e-06, "loss": 0.113, "step": 5307, "task_loss": 0.24399572610855103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949341149347405, "compression_loss": 0.0, "distillation_loss": 0.1453973799943924, "epoch": 5.04, "learning_rate": 9.090029562990911e-06, "loss": 0.1474, "step": 5308, "task_loss": 0.16563892364501953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949389891022046, "compression_loss": 0.0, "distillation_loss": 0.04174884408712387, "epoch": 5.04, "learning_rate": 9.081811975907126e-06, "loss": 0.0491, "step": 5309, "task_loss": 0.11511567234992981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949438601421954, "compression_loss": 0.0, "distillation_loss": 0.02893233485519886, "epoch": 5.04, "learning_rate": 9.07359728043885e-06, "loss": 0.0264, "step": 5310, "task_loss": 0.003949105739593506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949487280557164, "compression_loss": 0.0, "distillation_loss": 0.048204317688941956, "epoch": 5.04, "learning_rate": 9.065385478078337e-06, "loss": 0.0441, "step": 5311, "task_loss": 0.007329225540161133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949535928437713, "compression_loss": 0.0, "distillation_loss": 0.043466269969940186, "epoch": 5.04, "learning_rate": 9.057176570317285e-06, "loss": 0.0406, "step": 5312, "task_loss": 0.015056092292070389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949584545073639, "compression_loss": 0.0, "distillation_loss": 0.21660102903842926, "epoch": 5.05, "learning_rate": 9.0489705586469e-06, "loss": 0.2129, "step": 5313, "task_loss": 0.17953038215637207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949633130474979, "compression_loss": 0.0, "distillation_loss": 0.11881037801504135, "epoch": 5.05, "learning_rate": 9.040767444557836e-06, "loss": 0.1213, "step": 5314, "task_loss": 0.14401260018348694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949681684651768, "compression_loss": 0.0, "distillation_loss": 0.11176039278507233, "epoch": 5.05, "learning_rate": 9.032567229540212e-06, "loss": 0.1134, "step": 5315, "task_loss": 0.12841668725013733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949730207614044, "compression_loss": 0.0, "distillation_loss": 0.14567351341247559, "epoch": 5.05, "learning_rate": 9.024369915083664e-06, "loss": 0.1399, "step": 5316, "task_loss": 0.0879807248711586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949778699371844, "compression_loss": 0.0, "distillation_loss": 0.05770561844110489, "epoch": 5.05, "learning_rate": 9.01617550267726e-06, "loss": 0.0608, "step": 5317, "task_loss": 0.08822789788246155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949827159935204, "compression_loss": 0.0, "distillation_loss": 0.10660059005022049, "epoch": 5.05, "learning_rate": 9.007983993809543e-06, "loss": 0.0995, "step": 5318, "task_loss": 0.03520110249519348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949875589314163, "compression_loss": 0.0, "distillation_loss": 0.06890207529067993, "epoch": 5.05, "learning_rate": 8.999795389968554e-06, "loss": 0.0647, "step": 5319, "task_loss": 0.027203164994716644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949923987518754, "compression_loss": 0.0, "distillation_loss": 0.045612677931785583, "epoch": 5.05, "learning_rate": 8.9916096926418e-06, "loss": 0.048, "step": 5320, "task_loss": 0.06988806277513504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7949972354559018, "compression_loss": 0.0, "distillation_loss": 0.054146356880664825, "epoch": 5.05, "learning_rate": 8.983426903316242e-06, "loss": 0.0589, "step": 5321, "task_loss": 0.10151663422584534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950020690444989, "compression_loss": 0.0, "distillation_loss": 0.09348467737436295, "epoch": 5.05, "learning_rate": 8.975247023478314e-06, "loss": 0.0887, "step": 5322, "task_loss": 0.045579515397548676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950068995186704, "compression_loss": 0.0, "distillation_loss": 0.04567846655845642, "epoch": 5.06, "learning_rate": 8.967070054613949e-06, "loss": 0.048, "step": 5323, "task_loss": 0.06916110217571259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950117268794201, "compression_loss": 0.0, "distillation_loss": 0.03903430327773094, "epoch": 5.06, "learning_rate": 8.958895998208524e-06, "loss": 0.0357, "step": 5324, "task_loss": 0.005971364676952362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950165511277517, "compression_loss": 0.0, "distillation_loss": 0.16417047381401062, "epoch": 5.06, "learning_rate": 8.95072485574689e-06, "loss": 0.165, "step": 5325, "task_loss": 0.17254981398582458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950213722646687, "compression_loss": 0.0, "distillation_loss": 0.05708067864179611, "epoch": 5.06, "learning_rate": 8.942556628713386e-06, "loss": 0.0848, "step": 5326, "task_loss": 0.33394309878349304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795026190291175, "compression_loss": 0.0, "distillation_loss": 0.10749347507953644, "epoch": 5.06, "learning_rate": 8.934391318591794e-06, "loss": 0.1102, "step": 5327, "task_loss": 0.13443301618099213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950310052082741, "compression_loss": 0.0, "distillation_loss": 0.07780588418245316, "epoch": 5.06, "learning_rate": 8.926228926865404e-06, "loss": 0.0898, "step": 5328, "task_loss": 0.19749955832958221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950358170169698, "compression_loss": 0.0, "distillation_loss": 0.1407744139432907, "epoch": 5.06, "learning_rate": 8.918069455016931e-06, "loss": 0.1353, "step": 5329, "task_loss": 0.08600475639104843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950406257182657, "compression_loss": 0.0, "distillation_loss": 0.09022311121225357, "epoch": 5.06, "learning_rate": 8.9099129045286e-06, "loss": 0.0966, "step": 5330, "task_loss": 0.15400820970535278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950454313131655, "compression_loss": 0.0, "distillation_loss": 0.18497398495674133, "epoch": 5.06, "learning_rate": 8.901759276882082e-06, "loss": 0.2032, "step": 5331, "task_loss": 0.36690419912338257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795050233802673, "compression_loss": 0.0, "distillation_loss": 0.04657813161611557, "epoch": 5.06, "learning_rate": 8.893608573558515e-06, "loss": 0.0449, "step": 5332, "task_loss": 0.029313866049051285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950550331877917, "compression_loss": 0.0, "distillation_loss": 0.22918027639389038, "epoch": 5.06, "learning_rate": 8.885460796038525e-06, "loss": 0.2344, "step": 5333, "task_loss": 0.2814333438873291 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950598294695255, "compression_loss": 0.0, "distillation_loss": 0.02877352014183998, "epoch": 5.07, "learning_rate": 8.877315945802191e-06, "loss": 0.0269, "step": 5334, "task_loss": 0.010271269828081131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950646226488778, "compression_loss": 0.0, "distillation_loss": 0.039790619164705276, "epoch": 5.07, "learning_rate": 8.869174024329063e-06, "loss": 0.0367, "step": 5335, "task_loss": 0.00851232185959816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950694127268525, "compression_loss": 0.0, "distillation_loss": 0.05201128497719765, "epoch": 5.07, "learning_rate": 8.861035033098144e-06, "loss": 0.0603, "step": 5336, "task_loss": 0.13527747988700867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950741997044531, "compression_loss": 0.0, "distillation_loss": 0.02241450734436512, "epoch": 5.07, "learning_rate": 8.852898973587953e-06, "loss": 0.0208, "step": 5337, "task_loss": 0.005801070481538773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950789835826836, "compression_loss": 0.0, "distillation_loss": 0.12234519422054291, "epoch": 5.07, "learning_rate": 8.844765847276432e-06, "loss": 0.1178, "step": 5338, "task_loss": 0.07687139511108398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950837643625474, "compression_loss": 0.0, "distillation_loss": 0.042783450335264206, "epoch": 5.07, "learning_rate": 8.836635655640988e-06, "loss": 0.0391, "step": 5339, "task_loss": 0.0057541318237781525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950885420450482, "compression_loss": 0.0, "distillation_loss": 0.20203274488449097, "epoch": 5.07, "learning_rate": 8.828508400158531e-06, "loss": 0.1948, "step": 5340, "task_loss": 0.13001954555511475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950933166311898, "compression_loss": 0.0, "distillation_loss": 0.03790685907006264, "epoch": 5.07, "learning_rate": 8.820384082305403e-06, "loss": 0.0417, "step": 5341, "task_loss": 0.07593842595815659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7950980881219757, "compression_loss": 0.0, "distillation_loss": 0.02947581559419632, "epoch": 5.07, "learning_rate": 8.812262703557431e-06, "loss": 0.027, "step": 5342, "task_loss": 0.004847317934036255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951028565184098, "compression_loss": 0.0, "distillation_loss": 0.03460695222020149, "epoch": 5.07, "learning_rate": 8.804144265389888e-06, "loss": 0.0316, "step": 5343, "task_loss": 0.004228517413139343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951076218214957, "compression_loss": 0.0, "distillation_loss": 0.1003609448671341, "epoch": 5.08, "learning_rate": 8.796028769277542e-06, "loss": 0.0966, "step": 5344, "task_loss": 0.06230350583791733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951123840322372, "compression_loss": 0.0, "distillation_loss": 0.06244899705052376, "epoch": 5.08, "learning_rate": 8.78791621669462e-06, "loss": 0.0731, "step": 5345, "task_loss": 0.16876111924648285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951171431516376, "compression_loss": 0.0, "distillation_loss": 0.030795224010944366, "epoch": 5.08, "learning_rate": 8.77980660911479e-06, "loss": 0.0394, "step": 5346, "task_loss": 0.11702409386634827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795121899180701, "compression_loss": 0.0, "distillation_loss": 0.12711569666862488, "epoch": 5.08, "learning_rate": 8.771699948011202e-06, "loss": 0.1325, "step": 5347, "task_loss": 0.18047887086868286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951266521204309, "compression_loss": 0.0, "distillation_loss": 0.05044536665081978, "epoch": 5.08, "learning_rate": 8.763596234856482e-06, "loss": 0.0528, "step": 5348, "task_loss": 0.07405995577573776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795131401971831, "compression_loss": 0.0, "distillation_loss": 0.08389607816934586, "epoch": 5.08, "learning_rate": 8.7554954711227e-06, "loss": 0.0782, "step": 5349, "task_loss": 0.02667861431837082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951361487359049, "compression_loss": 0.0, "distillation_loss": 0.06966320425271988, "epoch": 5.08, "learning_rate": 8.747397658281395e-06, "loss": 0.0719, "step": 5350, "task_loss": 0.09200514107942581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951408924136564, "compression_loss": 0.0, "distillation_loss": 0.04684102535247803, "epoch": 5.08, "learning_rate": 8.739302797803585e-06, "loss": 0.0432, "step": 5351, "task_loss": 0.010602116584777832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951456330060892, "compression_loss": 0.0, "distillation_loss": 0.024965034797787666, "epoch": 5.08, "learning_rate": 8.731210891159736e-06, "loss": 0.0232, "step": 5352, "task_loss": 0.0075826942920684814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951503705142069, "compression_loss": 0.0, "distillation_loss": 0.04389042407274246, "epoch": 5.08, "learning_rate": 8.72312193981977e-06, "loss": 0.0407, "step": 5353, "task_loss": 0.011852305382490158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951551049390132, "compression_loss": 0.0, "distillation_loss": 0.1792694330215454, "epoch": 5.08, "learning_rate": 8.715035945253095e-06, "loss": 0.1675, "step": 5354, "task_loss": 0.06159057095646858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951598362815118, "compression_loss": 0.0, "distillation_loss": 0.07054481655359268, "epoch": 5.09, "learning_rate": 8.706952908928576e-06, "loss": 0.0744, "step": 5355, "task_loss": 0.1091315969824791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951645645427063, "compression_loss": 0.0, "distillation_loss": 0.07862917333841324, "epoch": 5.09, "learning_rate": 8.69887283231453e-06, "loss": 0.0713, "step": 5356, "task_loss": 0.005761042237281799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951692897236006, "compression_loss": 0.0, "distillation_loss": 0.07857898622751236, "epoch": 5.09, "learning_rate": 8.690795716878733e-06, "loss": 0.0842, "step": 5357, "task_loss": 0.1351117193698883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951740118251982, "compression_loss": 0.0, "distillation_loss": 0.025391805917024612, "epoch": 5.09, "learning_rate": 8.68272156408845e-06, "loss": 0.0238, "step": 5358, "task_loss": 0.00902603566646576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951787308485028, "compression_loss": 0.0, "distillation_loss": 0.033960044384002686, "epoch": 5.09, "learning_rate": 8.67465037541038e-06, "loss": 0.0314, "step": 5359, "task_loss": 0.008060634136199951 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795183446794518, "compression_loss": 0.0, "distillation_loss": 0.015219409950077534, "epoch": 5.09, "learning_rate": 8.666582152310681e-06, "loss": 0.0192, "step": 5360, "task_loss": 0.05521482601761818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951881596642477, "compression_loss": 0.0, "distillation_loss": 0.04451043903827667, "epoch": 5.09, "learning_rate": 8.658516896255008e-06, "loss": 0.0419, "step": 5361, "task_loss": 0.01849602535367012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7951928694586955, "compression_loss": 0.0, "distillation_loss": 0.10168831050395966, "epoch": 5.09, "learning_rate": 8.650454608708434e-06, "loss": 0.1097, "step": 5362, "task_loss": 0.18197688460350037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795197576178865, "compression_loss": 0.0, "distillation_loss": 0.015219754539430141, "epoch": 5.09, "learning_rate": 8.642395291135527e-06, "loss": 0.014, "step": 5363, "task_loss": 0.003273945301771164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952022798257599, "compression_loss": 0.0, "distillation_loss": 0.03735918551683426, "epoch": 5.09, "learning_rate": 8.634338945000287e-06, "loss": 0.0442, "step": 5364, "task_loss": 0.10612202435731888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795206980400384, "compression_loss": 0.0, "distillation_loss": 0.0819498747587204, "epoch": 5.09, "learning_rate": 8.626285571766204e-06, "loss": 0.1011, "step": 5365, "task_loss": 0.27321359515190125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952116779037407, "compression_loss": 0.0, "distillation_loss": 0.036407940089702606, "epoch": 5.1, "learning_rate": 8.618235172896203e-06, "loss": 0.0459, "step": 5366, "task_loss": 0.13110488653182983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952163723368341, "compression_loss": 0.0, "distillation_loss": 0.03413936868309975, "epoch": 5.1, "learning_rate": 8.61018774985267e-06, "loss": 0.0388, "step": 5367, "task_loss": 0.08028659969568253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952210637006675, "compression_loss": 0.0, "distillation_loss": 0.026723839342594147, "epoch": 5.1, "learning_rate": 8.602143304097477e-06, "loss": 0.0248, "step": 5368, "task_loss": 0.007091682404279709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952257519962448, "compression_loss": 0.0, "distillation_loss": 0.07885417342185974, "epoch": 5.1, "learning_rate": 8.594101837091922e-06, "loss": 0.0772, "step": 5369, "task_loss": 0.06278681010007858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952304372245697, "compression_loss": 0.0, "distillation_loss": 0.19364023208618164, "epoch": 5.1, "learning_rate": 8.586063350296775e-06, "loss": 0.1925, "step": 5370, "task_loss": 0.18187300860881805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952351193866457, "compression_loss": 0.0, "distillation_loss": 0.02647271379828453, "epoch": 5.1, "learning_rate": 8.578027845172269e-06, "loss": 0.0321, "step": 5371, "task_loss": 0.08231362700462341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952397984834766, "compression_loss": 0.0, "distillation_loss": 0.04869615286588669, "epoch": 5.1, "learning_rate": 8.569995323178102e-06, "loss": 0.0519, "step": 5372, "task_loss": 0.08067583292722702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952444745160662, "compression_loss": 0.0, "distillation_loss": 0.017870426177978516, "epoch": 5.1, "learning_rate": 8.561965785773413e-06, "loss": 0.0165, "step": 5373, "task_loss": 0.004511706531047821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952491474854179, "compression_loss": 0.0, "distillation_loss": 0.010581446811556816, "epoch": 5.1, "learning_rate": 8.553939234416796e-06, "loss": 0.0097, "step": 5374, "task_loss": 0.0022147856652736664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952538173925355, "compression_loss": 0.0, "distillation_loss": 0.04116586595773697, "epoch": 5.1, "learning_rate": 8.545915670566331e-06, "loss": 0.0379, "step": 5375, "task_loss": 0.008842799812555313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952584842384228, "compression_loss": 0.0, "distillation_loss": 0.2459995001554489, "epoch": 5.11, "learning_rate": 8.53789509567953e-06, "loss": 0.2456, "step": 5376, "task_loss": 0.24173244833946228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952631480240835, "compression_loss": 0.0, "distillation_loss": 0.09949977695941925, "epoch": 5.11, "learning_rate": 8.529877511213357e-06, "loss": 0.0938, "step": 5377, "task_loss": 0.04220582917332649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795267808750521, "compression_loss": 0.0, "distillation_loss": 0.03112136758863926, "epoch": 5.11, "learning_rate": 8.521862918624265e-06, "loss": 0.0283, "step": 5378, "task_loss": 0.0033892393112182617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952724664187393, "compression_loss": 0.0, "distillation_loss": 0.08223070204257965, "epoch": 5.11, "learning_rate": 8.513851319368124e-06, "loss": 0.0853, "step": 5379, "task_loss": 0.11259977519512177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952771210297419, "compression_loss": 0.0, "distillation_loss": 0.07326701283454895, "epoch": 5.11, "learning_rate": 8.505842714900297e-06, "loss": 0.0902, "step": 5380, "task_loss": 0.24259977042675018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952817725845325, "compression_loss": 0.0, "distillation_loss": 0.03324619308114052, "epoch": 5.11, "learning_rate": 8.497837106675571e-06, "loss": 0.0454, "step": 5381, "task_loss": 0.15457826852798462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952864210841147, "compression_loss": 0.0, "distillation_loss": 0.05445803701877594, "epoch": 5.11, "learning_rate": 8.489834496148217e-06, "loss": 0.0585, "step": 5382, "task_loss": 0.09474760293960571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952910665294924, "compression_loss": 0.0, "distillation_loss": 0.05557304993271828, "epoch": 5.11, "learning_rate": 8.481834884771938e-06, "loss": 0.0507, "step": 5383, "task_loss": 0.006748411804437637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7952957089216692, "compression_loss": 0.0, "distillation_loss": 0.09471587836742401, "epoch": 5.11, "learning_rate": 8.4738382739999e-06, "loss": 0.094, "step": 5384, "task_loss": 0.08728061616420746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953003482616489, "compression_loss": 0.0, "distillation_loss": 0.10454539954662323, "epoch": 5.11, "learning_rate": 8.465844665284739e-06, "loss": 0.1136, "step": 5385, "task_loss": 0.19549594819545746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953049845504347, "compression_loss": 0.0, "distillation_loss": 0.17727123200893402, "epoch": 5.11, "learning_rate": 8.45785406007852e-06, "loss": 0.1673, "step": 5386, "task_loss": 0.07721205800771713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953096177890309, "compression_loss": 0.0, "distillation_loss": 0.02914445474743843, "epoch": 5.12, "learning_rate": 8.449866459832775e-06, "loss": 0.0333, "step": 5387, "task_loss": 0.0706995502114296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953142479784407, "compression_loss": 0.0, "distillation_loss": 0.03571475297212601, "epoch": 5.12, "learning_rate": 8.44188186599849e-06, "loss": 0.0336, "step": 5388, "task_loss": 0.014664966613054276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953188751196681, "compression_loss": 0.0, "distillation_loss": 0.06461668014526367, "epoch": 5.12, "learning_rate": 8.433900280026118e-06, "loss": 0.0607, "step": 5389, "task_loss": 0.0250856913626194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953234992137167, "compression_loss": 0.0, "distillation_loss": 0.09197506308555603, "epoch": 5.12, "learning_rate": 8.425921703365547e-06, "loss": 0.094, "step": 5390, "task_loss": 0.11221229285001755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953281202615902, "compression_loss": 0.0, "distillation_loss": 0.04114022105932236, "epoch": 5.12, "learning_rate": 8.417946137466107e-06, "loss": 0.0377, "step": 5391, "task_loss": 0.006361830979585648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953327382642921, "compression_loss": 0.0, "distillation_loss": 0.11470133811235428, "epoch": 5.12, "learning_rate": 8.409973583776624e-06, "loss": 0.133, "step": 5392, "task_loss": 0.29758089780807495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953373532228262, "compression_loss": 0.0, "distillation_loss": 0.029153695330023766, "epoch": 5.12, "learning_rate": 8.402004043745335e-06, "loss": 0.0271, "step": 5393, "task_loss": 0.008810054510831833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953419651381962, "compression_loss": 0.0, "distillation_loss": 0.031133677810430527, "epoch": 5.12, "learning_rate": 8.39403751881994e-06, "loss": 0.0376, "step": 5394, "task_loss": 0.09556027501821518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953465740114058, "compression_loss": 0.0, "distillation_loss": 0.08608759194612503, "epoch": 5.12, "learning_rate": 8.386074010447614e-06, "loss": 0.0831, "step": 5395, "task_loss": 0.05571907386183739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953511798434587, "compression_loss": 0.0, "distillation_loss": 0.028766460716724396, "epoch": 5.12, "learning_rate": 8.378113520074949e-06, "loss": 0.0262, "step": 5396, "task_loss": 0.003098204731941223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953557826353584, "compression_loss": 0.0, "distillation_loss": 0.09285937994718552, "epoch": 5.13, "learning_rate": 8.370156049148022e-06, "loss": 0.0864, "step": 5397, "task_loss": 0.027815401554107666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953603823881089, "compression_loss": 0.0, "distillation_loss": 0.07426916807889938, "epoch": 5.13, "learning_rate": 8.362201599112332e-06, "loss": 0.0757, "step": 5398, "task_loss": 0.08855120837688446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953649791027136, "compression_loss": 0.0, "distillation_loss": 0.026602642610669136, "epoch": 5.13, "learning_rate": 8.354250171412859e-06, "loss": 0.0299, "step": 5399, "task_loss": 0.0595041885972023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953695727801764, "compression_loss": 0.0, "distillation_loss": 0.10936440527439117, "epoch": 5.13, "learning_rate": 8.346301767494008e-06, "loss": 0.1146, "step": 5400, "task_loss": 0.16161611676216125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953741634215007, "compression_loss": 0.0, "distillation_loss": 0.021258335560560226, "epoch": 5.13, "learning_rate": 8.338356388799637e-06, "loss": 0.0326, "step": 5401, "task_loss": 0.1344192773103714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953787510276905, "compression_loss": 0.0, "distillation_loss": 0.04099886491894722, "epoch": 5.13, "learning_rate": 8.330414036773082e-06, "loss": 0.0412, "step": 5402, "task_loss": 0.04298762232065201 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953833355997493, "compression_loss": 0.0, "distillation_loss": 0.07252480089664459, "epoch": 5.13, "learning_rate": 8.322474712857095e-06, "loss": 0.0676, "step": 5403, "task_loss": 0.023213542997837067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953879171386807, "compression_loss": 0.0, "distillation_loss": 0.023728784173727036, "epoch": 5.13, "learning_rate": 8.314538418493892e-06, "loss": 0.0218, "step": 5404, "task_loss": 0.004381675273180008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953924956454886, "compression_loss": 0.0, "distillation_loss": 0.05856727808713913, "epoch": 5.13, "learning_rate": 8.306605155125141e-06, "loss": 0.0642, "step": 5405, "task_loss": 0.11514223366975784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7953970711211765, "compression_loss": 0.0, "distillation_loss": 0.022047195583581924, "epoch": 5.13, "learning_rate": 8.298674924191968e-06, "loss": 0.0206, "step": 5406, "task_loss": 0.0073068346828222275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954016435667483, "compression_loss": 0.0, "distillation_loss": 0.02194095030426979, "epoch": 5.13, "learning_rate": 8.29074772713493e-06, "loss": 0.0201, "step": 5407, "task_loss": 0.0031332336366176605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954062129832075, "compression_loss": 0.0, "distillation_loss": 0.03532099723815918, "epoch": 5.14, "learning_rate": 8.282823565394032e-06, "loss": 0.0414, "step": 5408, "task_loss": 0.09565601497888565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954107793715578, "compression_loss": 0.0, "distillation_loss": 0.12141124904155731, "epoch": 5.14, "learning_rate": 8.274902440408755e-06, "loss": 0.125, "step": 5409, "task_loss": 0.1574663519859314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954153427328029, "compression_loss": 0.0, "distillation_loss": 0.0844094529747963, "epoch": 5.14, "learning_rate": 8.266984353617995e-06, "loss": 0.092, "step": 5410, "task_loss": 0.16067147254943848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954199030679464, "compression_loss": 0.0, "distillation_loss": 0.07113252580165863, "epoch": 5.14, "learning_rate": 8.259069306460107e-06, "loss": 0.0768, "step": 5411, "task_loss": 0.12745647132396698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954244603779922, "compression_loss": 0.0, "distillation_loss": 0.12387774884700775, "epoch": 5.14, "learning_rate": 8.251157300372913e-06, "loss": 0.1206, "step": 5412, "task_loss": 0.09093039482831955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954290146639438, "compression_loss": 0.0, "distillation_loss": 0.12096986174583435, "epoch": 5.14, "learning_rate": 8.243248336793658e-06, "loss": 0.1201, "step": 5413, "task_loss": 0.11250494420528412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954335659268049, "compression_loss": 0.0, "distillation_loss": 0.09077440202236176, "epoch": 5.14, "learning_rate": 8.235342417159036e-06, "loss": 0.0899, "step": 5414, "task_loss": 0.08181983232498169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954381141675794, "compression_loss": 0.0, "distillation_loss": 0.08009003102779388, "epoch": 5.14, "learning_rate": 8.227439542905205e-06, "loss": 0.0828, "step": 5415, "task_loss": 0.10733769834041595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954426593872705, "compression_loss": 0.0, "distillation_loss": 0.05436462163925171, "epoch": 5.14, "learning_rate": 8.219539715467766e-06, "loss": 0.0544, "step": 5416, "task_loss": 0.05464364215731621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954472015868824, "compression_loss": 0.0, "distillation_loss": 0.029182853177189827, "epoch": 5.14, "learning_rate": 8.211642936281752e-06, "loss": 0.0272, "step": 5417, "task_loss": 0.00951925665140152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954517407674185, "compression_loss": 0.0, "distillation_loss": 0.027566958218812943, "epoch": 5.15, "learning_rate": 8.203749206781647e-06, "loss": 0.0264, "step": 5418, "task_loss": 0.015928007662296295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954562769298826, "compression_loss": 0.0, "distillation_loss": 0.17319881916046143, "epoch": 5.15, "learning_rate": 8.195858528401396e-06, "loss": 0.1678, "step": 5419, "task_loss": 0.11959183216094971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954608100752782, "compression_loss": 0.0, "distillation_loss": 0.012766760773956776, "epoch": 5.15, "learning_rate": 8.187970902574371e-06, "loss": 0.0118, "step": 5420, "task_loss": 0.002884853631258011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954653402046092, "compression_loss": 0.0, "distillation_loss": 0.1071738451719284, "epoch": 5.15, "learning_rate": 8.180086330733403e-06, "loss": 0.1129, "step": 5421, "task_loss": 0.16459250450134277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954698673188793, "compression_loss": 0.0, "distillation_loss": 0.06729540228843689, "epoch": 5.15, "learning_rate": 8.172204814310742e-06, "loss": 0.0673, "step": 5422, "task_loss": 0.06722187995910645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954743914190919, "compression_loss": 0.0, "distillation_loss": 0.041276026517152786, "epoch": 5.15, "learning_rate": 8.164326354738134e-06, "loss": 0.0452, "step": 5423, "task_loss": 0.0802101194858551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954789125062509, "compression_loss": 0.0, "distillation_loss": 0.04235687106847763, "epoch": 5.15, "learning_rate": 8.156450953446725e-06, "loss": 0.0563, "step": 5424, "task_loss": 0.18132589757442474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954834305813601, "compression_loss": 0.0, "distillation_loss": 0.041152555495500565, "epoch": 5.15, "learning_rate": 8.148578611867114e-06, "loss": 0.051, "step": 5425, "task_loss": 0.13986477255821228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954879456454228, "compression_loss": 0.0, "distillation_loss": 0.034636110067367554, "epoch": 5.15, "learning_rate": 8.14070933142936e-06, "loss": 0.0315, "step": 5426, "task_loss": 0.003546604886651039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795492457699443, "compression_loss": 0.0, "distillation_loss": 0.040368109941482544, "epoch": 5.15, "learning_rate": 8.132843113562954e-06, "loss": 0.0446, "step": 5427, "task_loss": 0.08240634202957153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7954969667444243, "compression_loss": 0.0, "distillation_loss": 0.028532054275274277, "epoch": 5.15, "learning_rate": 8.12497995969683e-06, "loss": 0.0265, "step": 5428, "task_loss": 0.008003626018762589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955014727813703, "compression_loss": 0.0, "distillation_loss": 0.019893258810043335, "epoch": 5.16, "learning_rate": 8.11711987125936e-06, "loss": 0.019, "step": 5429, "task_loss": 0.01055414229631424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955059758112848, "compression_loss": 0.0, "distillation_loss": 0.06559453159570694, "epoch": 5.16, "learning_rate": 8.109262849678378e-06, "loss": 0.066, "step": 5430, "task_loss": 0.06944157928228378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955104758351714, "compression_loss": 0.0, "distillation_loss": 0.07780923694372177, "epoch": 5.16, "learning_rate": 8.101408896381141e-06, "loss": 0.0707, "step": 5431, "task_loss": 0.006445575505495071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955149728540338, "compression_loss": 0.0, "distillation_loss": 0.049274250864982605, "epoch": 5.16, "learning_rate": 8.093558012794363e-06, "loss": 0.0533, "step": 5432, "task_loss": 0.08973647654056549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955194668688758, "compression_loss": 0.0, "distillation_loss": 0.038362838327884674, "epoch": 5.16, "learning_rate": 8.085710200344202e-06, "loss": 0.0464, "step": 5433, "task_loss": 0.1189565435051918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955239578807008, "compression_loss": 0.0, "distillation_loss": 0.14410632848739624, "epoch": 5.16, "learning_rate": 8.077865460456241e-06, "loss": 0.1396, "step": 5434, "task_loss": 0.09874996542930603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955284458905129, "compression_loss": 0.0, "distillation_loss": 0.019637832418084145, "epoch": 5.16, "learning_rate": 8.070023794555521e-06, "loss": 0.0285, "step": 5435, "task_loss": 0.1084957867860794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955329308993153, "compression_loss": 0.0, "distillation_loss": 0.022479506209492683, "epoch": 5.16, "learning_rate": 8.062185204066503e-06, "loss": 0.0287, "step": 5436, "task_loss": 0.08433564752340317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955374129081121, "compression_loss": 0.0, "distillation_loss": 0.018630899488925934, "epoch": 5.16, "learning_rate": 8.054349690413124e-06, "loss": 0.0186, "step": 5437, "task_loss": 0.018421385437250137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955418919179067, "compression_loss": 0.0, "distillation_loss": 0.1612919569015503, "epoch": 5.16, "learning_rate": 8.046517255018737e-06, "loss": 0.1564, "step": 5438, "task_loss": 0.11284254491329193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955463679297029, "compression_loss": 0.0, "distillation_loss": 0.02024524286389351, "epoch": 5.17, "learning_rate": 8.03868789930613e-06, "loss": 0.0189, "step": 5439, "task_loss": 0.006963614374399185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955508409445043, "compression_loss": 0.0, "distillation_loss": 0.08255909383296967, "epoch": 5.17, "learning_rate": 8.030861624697552e-06, "loss": 0.0821, "step": 5440, "task_loss": 0.07832753658294678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955553109633149, "compression_loss": 0.0, "distillation_loss": 0.03200299292802811, "epoch": 5.17, "learning_rate": 8.023038432614693e-06, "loss": 0.0304, "step": 5441, "task_loss": 0.01600661687552929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795559777987138, "compression_loss": 0.0, "distillation_loss": 0.044372882694005966, "epoch": 5.17, "learning_rate": 8.015218324478666e-06, "loss": 0.0583, "step": 5442, "task_loss": 0.18402326107025146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955642420169773, "compression_loss": 0.0, "distillation_loss": 0.03316294774413109, "epoch": 5.17, "learning_rate": 8.007401301710022e-06, "loss": 0.0323, "step": 5443, "task_loss": 0.02459472045302391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955687030538368, "compression_loss": 0.0, "distillation_loss": 0.04046769440174103, "epoch": 5.17, "learning_rate": 7.999587365728776e-06, "loss": 0.0457, "step": 5444, "task_loss": 0.09298967570066452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955731610987198, "compression_loss": 0.0, "distillation_loss": 0.03207171708345413, "epoch": 5.17, "learning_rate": 7.991776517954359e-06, "loss": 0.0461, "step": 5445, "task_loss": 0.17218343913555145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955776161526302, "compression_loss": 0.0, "distillation_loss": 0.05809757858514786, "epoch": 5.17, "learning_rate": 7.983968759805641e-06, "loss": 0.0545, "step": 5446, "task_loss": 0.022373545914888382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955820682165717, "compression_loss": 0.0, "distillation_loss": 0.03011673502624035, "epoch": 5.17, "learning_rate": 7.976164092700958e-06, "loss": 0.0321, "step": 5447, "task_loss": 0.050249215215444565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795586517291548, "compression_loss": 0.0, "distillation_loss": 0.08846057951450348, "epoch": 5.17, "learning_rate": 7.968362518058048e-06, "loss": 0.085, "step": 5448, "task_loss": 0.05429108440876007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955909633785625, "compression_loss": 0.0, "distillation_loss": 0.2117672562599182, "epoch": 5.17, "learning_rate": 7.960564037294118e-06, "loss": 0.2183, "step": 5449, "task_loss": 0.276851087808609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955954064786193, "compression_loss": 0.0, "distillation_loss": 0.06762327998876572, "epoch": 5.18, "learning_rate": 7.952768651825784e-06, "loss": 0.0656, "step": 5450, "task_loss": 0.04782693088054657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7955998465927218, "compression_loss": 0.0, "distillation_loss": 0.0332944318652153, "epoch": 5.18, "learning_rate": 7.944976363069137e-06, "loss": 0.0315, "step": 5451, "task_loss": 0.015227574855089188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956042837218736, "compression_loss": 0.0, "distillation_loss": 0.15575581789016724, "epoch": 5.18, "learning_rate": 7.937187172439669e-06, "loss": 0.1656, "step": 5452, "task_loss": 0.25407275557518005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956087178670788, "compression_loss": 0.0, "distillation_loss": 0.04357504844665527, "epoch": 5.18, "learning_rate": 7.929401081352319e-06, "loss": 0.0438, "step": 5453, "task_loss": 0.045449040830135345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956131490293407, "compression_loss": 0.0, "distillation_loss": 0.0458906814455986, "epoch": 5.18, "learning_rate": 7.921618091221484e-06, "loss": 0.047, "step": 5454, "task_loss": 0.05692676454782486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956175772096631, "compression_loss": 0.0, "distillation_loss": 0.023164385929703712, "epoch": 5.18, "learning_rate": 7.91383820346097e-06, "loss": 0.0319, "step": 5455, "task_loss": 0.1104351356625557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956220024090497, "compression_loss": 0.0, "distillation_loss": 0.04113226756453514, "epoch": 5.18, "learning_rate": 7.906061419484028e-06, "loss": 0.0444, "step": 5456, "task_loss": 0.07416713237762451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956264246285041, "compression_loss": 0.0, "distillation_loss": 0.043819695711135864, "epoch": 5.18, "learning_rate": 7.898287740703356e-06, "loss": 0.0427, "step": 5457, "task_loss": 0.03260737285017967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79563084386903, "compression_loss": 0.0, "distillation_loss": 0.028930241242051125, "epoch": 5.18, "learning_rate": 7.890517168531086e-06, "loss": 0.0365, "step": 5458, "task_loss": 0.1042308360338211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956352601316313, "compression_loss": 0.0, "distillation_loss": 0.20450790226459503, "epoch": 5.18, "learning_rate": 7.882749704378773e-06, "loss": 0.1955, "step": 5459, "task_loss": 0.11405302584171295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956396734173115, "compression_loss": 0.0, "distillation_loss": 0.048243820667266846, "epoch": 5.19, "learning_rate": 7.874985349657405e-06, "loss": 0.0511, "step": 5460, "task_loss": 0.07671748101711273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956440837270742, "compression_loss": 0.0, "distillation_loss": 0.012140437960624695, "epoch": 5.19, "learning_rate": 7.86722410577743e-06, "loss": 0.0113, "step": 5461, "task_loss": 0.003828030079603195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956484910619231, "compression_loss": 0.0, "distillation_loss": 0.02375885099172592, "epoch": 5.19, "learning_rate": 7.85946597414871e-06, "loss": 0.0221, "step": 5462, "task_loss": 0.006828447803854942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956528954228621, "compression_loss": 0.0, "distillation_loss": 0.07895895093679428, "epoch": 5.19, "learning_rate": 7.85171095618054e-06, "loss": 0.0807, "step": 5463, "task_loss": 0.0966450572013855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956572968108947, "compression_loss": 0.0, "distillation_loss": 0.025494446977972984, "epoch": 5.19, "learning_rate": 7.843959053281663e-06, "loss": 0.0237, "step": 5464, "task_loss": 0.007278071716427803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956616952270247, "compression_loss": 0.0, "distillation_loss": 0.020559193566441536, "epoch": 5.19, "learning_rate": 7.836210266860253e-06, "loss": 0.0241, "step": 5465, "task_loss": 0.05645029619336128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956660906722556, "compression_loss": 0.0, "distillation_loss": 0.0954541563987732, "epoch": 5.19, "learning_rate": 7.8284645983239e-06, "loss": 0.0991, "step": 5466, "task_loss": 0.1314251720905304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956704831475913, "compression_loss": 0.0, "distillation_loss": 0.047063831239938736, "epoch": 5.19, "learning_rate": 7.820722049079653e-06, "loss": 0.0666, "step": 5467, "task_loss": 0.24207653105258942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956748726540352, "compression_loss": 0.0, "distillation_loss": 0.04180522635579109, "epoch": 5.19, "learning_rate": 7.812982620533993e-06, "loss": 0.0509, "step": 5468, "task_loss": 0.13276955485343933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956792591925913, "compression_loss": 0.0, "distillation_loss": 0.01592065393924713, "epoch": 5.19, "learning_rate": 7.805246314092809e-06, "loss": 0.0236, "step": 5469, "task_loss": 0.09271353483200073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956836427642631, "compression_loss": 0.0, "distillation_loss": 0.032307371497154236, "epoch": 5.19, "learning_rate": 7.797513131161437e-06, "loss": 0.0293, "step": 5470, "task_loss": 0.002665068954229355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956880233700542, "compression_loss": 0.0, "distillation_loss": 0.042782217264175415, "epoch": 5.2, "learning_rate": 7.78978307314466e-06, "loss": 0.0452, "step": 5471, "task_loss": 0.06668760627508163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956924010109685, "compression_loss": 0.0, "distillation_loss": 0.02939591184258461, "epoch": 5.2, "learning_rate": 7.782056141446673e-06, "loss": 0.0269, "step": 5472, "task_loss": 0.003960005939006805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7956967756880096, "compression_loss": 0.0, "distillation_loss": 0.13202489912509918, "epoch": 5.2, "learning_rate": 7.774332337471101e-06, "loss": 0.136, "step": 5473, "task_loss": 0.17181508243083954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957011474021811, "compression_loss": 0.0, "distillation_loss": 0.02327493578195572, "epoch": 5.2, "learning_rate": 7.766611662621023e-06, "loss": 0.0219, "step": 5474, "task_loss": 0.009945593774318695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957055161544868, "compression_loss": 0.0, "distillation_loss": 0.05180184915661812, "epoch": 5.2, "learning_rate": 7.75889411829894e-06, "loss": 0.0526, "step": 5475, "task_loss": 0.05985014885663986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957098819459303, "compression_loss": 0.0, "distillation_loss": 0.09386663138866425, "epoch": 5.2, "learning_rate": 7.751179705906774e-06, "loss": 0.0984, "step": 5476, "task_loss": 0.13911299407482147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957142447775153, "compression_loss": 0.0, "distillation_loss": 0.060429397970438004, "epoch": 5.2, "learning_rate": 7.74346842684588e-06, "loss": 0.0566, "step": 5477, "task_loss": 0.022498732432723045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957186046502455, "compression_loss": 0.0, "distillation_loss": 0.02577110007405281, "epoch": 5.2, "learning_rate": 7.73576028251706e-06, "loss": 0.0237, "step": 5478, "task_loss": 0.005201183259487152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957229615651246, "compression_loss": 0.0, "distillation_loss": 0.04601401090621948, "epoch": 5.2, "learning_rate": 7.728055274320527e-06, "loss": 0.0437, "step": 5479, "task_loss": 0.02243119105696678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957273155231561, "compression_loss": 0.0, "distillation_loss": 0.11642314493656158, "epoch": 5.2, "learning_rate": 7.720353403655934e-06, "loss": 0.1139, "step": 5480, "task_loss": 0.09131792187690735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795731666525344, "compression_loss": 0.0, "distillation_loss": 0.06120690703392029, "epoch": 5.21, "learning_rate": 7.712654671922371e-06, "loss": 0.0697, "step": 5481, "task_loss": 0.14600446820259094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957360145726917, "compression_loss": 0.0, "distillation_loss": 0.014658894389867783, "epoch": 5.21, "learning_rate": 7.704959080518343e-06, "loss": 0.0295, "step": 5482, "task_loss": 0.16297921538352966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957403596662032, "compression_loss": 0.0, "distillation_loss": 0.05037935450673103, "epoch": 5.21, "learning_rate": 7.697266630841784e-06, "loss": 0.0564, "step": 5483, "task_loss": 0.1106264591217041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957447018068818, "compression_loss": 0.0, "distillation_loss": 0.02987675368785858, "epoch": 5.21, "learning_rate": 7.689577324290073e-06, "loss": 0.0273, "step": 5484, "task_loss": 0.004036994650959969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957490409957314, "compression_loss": 0.0, "distillation_loss": 0.06058541685342789, "epoch": 5.21, "learning_rate": 7.681891162260015e-06, "loss": 0.061, "step": 5485, "task_loss": 0.06461737304925919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957533772337557, "compression_loss": 0.0, "distillation_loss": 0.024111486971378326, "epoch": 5.21, "learning_rate": 7.674208146147834e-06, "loss": 0.0267, "step": 5486, "task_loss": 0.04959327355027199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957577105219583, "compression_loss": 0.0, "distillation_loss": 0.058419279754161835, "epoch": 5.21, "learning_rate": 7.666528277349177e-06, "loss": 0.0589, "step": 5487, "task_loss": 0.06302666664123535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957620408613428, "compression_loss": 0.0, "distillation_loss": 0.04451620578765869, "epoch": 5.21, "learning_rate": 7.658851557259144e-06, "loss": 0.0585, "step": 5488, "task_loss": 0.18458762764930725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957663682529131, "compression_loss": 0.0, "distillation_loss": 0.047050587832927704, "epoch": 5.21, "learning_rate": 7.651177987272243e-06, "loss": 0.0492, "step": 5489, "task_loss": 0.06901868432760239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957706926976729, "compression_loss": 0.0, "distillation_loss": 0.03578869253396988, "epoch": 5.21, "learning_rate": 7.643507568782407e-06, "loss": 0.033, "step": 5490, "task_loss": 0.007528049871325493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957750141966256, "compression_loss": 0.0, "distillation_loss": 0.04843487590551376, "epoch": 5.21, "learning_rate": 7.635840303183018e-06, "loss": 0.0525, "step": 5491, "task_loss": 0.08893577009439468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957793327507751, "compression_loss": 0.0, "distillation_loss": 0.014655525796115398, "epoch": 5.22, "learning_rate": 7.628176191866853e-06, "loss": 0.0138, "step": 5492, "task_loss": 0.005986766889691353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957836483611249, "compression_loss": 0.0, "distillation_loss": 0.036418359726667404, "epoch": 5.22, "learning_rate": 7.6205152362261586e-06, "loss": 0.0488, "step": 5493, "task_loss": 0.1600310057401657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957879610286789, "compression_loss": 0.0, "distillation_loss": 0.031819455325603485, "epoch": 5.22, "learning_rate": 7.612857437652563e-06, "loss": 0.0348, "step": 5494, "task_loss": 0.06123275309801102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957922707544407, "compression_loss": 0.0, "distillation_loss": 0.0890742614865303, "epoch": 5.22, "learning_rate": 7.605202797537156e-06, "loss": 0.0832, "step": 5495, "task_loss": 0.03046075440943241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7957965775394139, "compression_loss": 0.0, "distillation_loss": 0.06095193326473236, "epoch": 5.22, "learning_rate": 7.5975513172704375e-06, "loss": 0.0706, "step": 5496, "task_loss": 0.15771833062171936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958008813846024, "compression_loss": 0.0, "distillation_loss": 0.04849329590797424, "epoch": 5.22, "learning_rate": 7.589902998242327e-06, "loss": 0.0612, "step": 5497, "task_loss": 0.17582949995994568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958051822910096, "compression_loss": 0.0, "distillation_loss": 0.08853879570960999, "epoch": 5.22, "learning_rate": 7.582257841842191e-06, "loss": 0.0895, "step": 5498, "task_loss": 0.09831196069717407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958094802596393, "compression_loss": 0.0, "distillation_loss": 0.029421133920550346, "epoch": 5.22, "learning_rate": 7.574615849458805e-06, "loss": 0.0488, "step": 5499, "task_loss": 0.22320178151130676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958137752914952, "compression_loss": 0.0, "distillation_loss": 0.10746181011199951, "epoch": 5.22, "learning_rate": 7.56697702248036e-06, "loss": 0.1032, "step": 5500, "task_loss": 0.06493166834115982 }, { "epoch": 5.22, "eval_accuracy": 0.8795871559633027, "eval_loss": 0.5020073056221008, "eval_runtime": 18.0294, "eval_samples_per_second": 48.365, "eval_steps_per_second": 6.046, "step": 5500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958180673875811, "compression_loss": 0.0, "distillation_loss": 0.07600829005241394, "epoch": 5.22, "learning_rate": 7.5593413622945e-06, "loss": 0.0773, "step": 5501, "task_loss": 0.08857627213001251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958223565489004, "compression_loss": 0.0, "distillation_loss": 0.06297820806503296, "epoch": 5.23, "learning_rate": 7.551708870288282e-06, "loss": 0.0582, "step": 5502, "task_loss": 0.0155994463711977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958266427764571, "compression_loss": 0.0, "distillation_loss": 0.045012425631284714, "epoch": 5.23, "learning_rate": 7.5440795478481815e-06, "loss": 0.0409, "step": 5503, "task_loss": 0.0038008801639080048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958309260712546, "compression_loss": 0.0, "distillation_loss": 0.02455669268965721, "epoch": 5.23, "learning_rate": 7.536453396360091e-06, "loss": 0.025, "step": 5504, "task_loss": 0.0289287306368351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958352064342967, "compression_loss": 0.0, "distillation_loss": 0.082015261054039, "epoch": 5.23, "learning_rate": 7.528830417209356e-06, "loss": 0.0851, "step": 5505, "task_loss": 0.11327110230922699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958394838665872, "compression_loss": 0.0, "distillation_loss": 0.025041064247488976, "epoch": 5.23, "learning_rate": 7.521210611780716e-06, "loss": 0.024, "step": 5506, "task_loss": 0.014456404373049736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958437583691296, "compression_loss": 0.0, "distillation_loss": 0.1386338621377945, "epoch": 5.23, "learning_rate": 7.513593981458347e-06, "loss": 0.1412, "step": 5507, "task_loss": 0.1647292524576187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958480299429276, "compression_loss": 0.0, "distillation_loss": 0.11838963627815247, "epoch": 5.23, "learning_rate": 7.505980527625841e-06, "loss": 0.1125, "step": 5508, "task_loss": 0.0594179704785347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795852298588985, "compression_loss": 0.0, "distillation_loss": 0.035721536725759506, "epoch": 5.23, "learning_rate": 7.498370251666223e-06, "loss": 0.0433, "step": 5509, "task_loss": 0.1113739162683487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958565643083054, "compression_loss": 0.0, "distillation_loss": 0.04196429252624512, "epoch": 5.23, "learning_rate": 7.490763154961944e-06, "loss": 0.0402, "step": 5510, "task_loss": 0.02450537495315075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958608271018925, "compression_loss": 0.0, "distillation_loss": 0.15733975172042847, "epoch": 5.23, "learning_rate": 7.483159238894858e-06, "loss": 0.156, "step": 5511, "task_loss": 0.14443063735961914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79586508697075, "compression_loss": 0.0, "distillation_loss": 0.021348509937524796, "epoch": 5.23, "learning_rate": 7.475558504846264e-06, "loss": 0.0288, "step": 5512, "task_loss": 0.09601381421089172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958693439158816, "compression_loss": 0.0, "distillation_loss": 0.03914246708154678, "epoch": 5.24, "learning_rate": 7.467960954196867e-06, "loss": 0.0441, "step": 5513, "task_loss": 0.08865661174058914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958735979382908, "compression_loss": 0.0, "distillation_loss": 0.1477883756160736, "epoch": 5.24, "learning_rate": 7.460366588326797e-06, "loss": 0.1582, "step": 5514, "task_loss": 0.25182464718818665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958778490389816, "compression_loss": 0.0, "distillation_loss": 0.041810937225818634, "epoch": 5.24, "learning_rate": 7.452775408615603e-06, "loss": 0.0481, "step": 5515, "task_loss": 0.10518115013837814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958820972189574, "compression_loss": 0.0, "distillation_loss": 0.01617216318845749, "epoch": 5.24, "learning_rate": 7.44518741644227e-06, "loss": 0.0226, "step": 5516, "task_loss": 0.07998733222484589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958863424792221, "compression_loss": 0.0, "distillation_loss": 0.05367042124271393, "epoch": 5.24, "learning_rate": 7.4376026131851875e-06, "loss": 0.0558, "step": 5517, "task_loss": 0.07507097721099854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958905848207791, "compression_loss": 0.0, "distillation_loss": 0.07538196444511414, "epoch": 5.24, "learning_rate": 7.430021000222156e-06, "loss": 0.0736, "step": 5518, "task_loss": 0.05794493108987808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958948242446324, "compression_loss": 0.0, "distillation_loss": 0.09253949671983719, "epoch": 5.24, "learning_rate": 7.422442578930444e-06, "loss": 0.0853, "step": 5519, "task_loss": 0.019849814474582672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7958990607517855, "compression_loss": 0.0, "distillation_loss": 0.016655316576361656, "epoch": 5.24, "learning_rate": 7.414867350686694e-06, "loss": 0.0227, "step": 5520, "task_loss": 0.07758677005767822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959032943432421, "compression_loss": 0.0, "distillation_loss": 0.06732715666294098, "epoch": 5.24, "learning_rate": 7.407295316866977e-06, "loss": 0.0669, "step": 5521, "task_loss": 0.06263066828250885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795907525020006, "compression_loss": 0.0, "distillation_loss": 0.10630275309085846, "epoch": 5.24, "learning_rate": 7.399726478846788e-06, "loss": 0.1214, "step": 5522, "task_loss": 0.2570263147354126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959117527830806, "compression_loss": 0.0, "distillation_loss": 0.05409185588359833, "epoch": 5.25, "learning_rate": 7.392160838001055e-06, "loss": 0.0691, "step": 5523, "task_loss": 0.20388929545879364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959159776334699, "compression_loss": 0.0, "distillation_loss": 0.04524946212768555, "epoch": 5.25, "learning_rate": 7.384598395704106e-06, "loss": 0.0479, "step": 5524, "task_loss": 0.0716903880238533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959201995721774, "compression_loss": 0.0, "distillation_loss": 0.2098562866449356, "epoch": 5.25, "learning_rate": 7.377039153329687e-06, "loss": 0.2018, "step": 5525, "task_loss": 0.12896330654621124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959244186002068, "compression_loss": 0.0, "distillation_loss": 0.037853680551052094, "epoch": 5.25, "learning_rate": 7.36948311225098e-06, "loss": 0.0462, "step": 5526, "task_loss": 0.12155872583389282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959286347185619, "compression_loss": 0.0, "distillation_loss": 0.018217723816633224, "epoch": 5.25, "learning_rate": 7.361930273840581e-06, "loss": 0.0254, "step": 5527, "task_loss": 0.09013350307941437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959328479282463, "compression_loss": 0.0, "distillation_loss": 0.07805530726909637, "epoch": 5.25, "learning_rate": 7.3543806394704955e-06, "loss": 0.0762, "step": 5528, "task_loss": 0.05900496616959572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959370582302636, "compression_loss": 0.0, "distillation_loss": 0.07341839373111725, "epoch": 5.25, "learning_rate": 7.346834210512138e-06, "loss": 0.0724, "step": 5529, "task_loss": 0.06279435008764267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959412656256175, "compression_loss": 0.0, "distillation_loss": 0.02832254022359848, "epoch": 5.25, "learning_rate": 7.3392909883363755e-06, "loss": 0.0378, "step": 5530, "task_loss": 0.12311002612113953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959454701153119, "compression_loss": 0.0, "distillation_loss": 0.04221939295530319, "epoch": 5.25, "learning_rate": 7.331750974313459e-06, "loss": 0.0418, "step": 5531, "task_loss": 0.03794408589601517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959496717003502, "compression_loss": 0.0, "distillation_loss": 0.11199971288442612, "epoch": 5.25, "learning_rate": 7.324214169813062e-06, "loss": 0.108, "step": 5532, "task_loss": 0.07245731353759766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959538703817363, "compression_loss": 0.0, "distillation_loss": 0.026987843215465546, "epoch": 5.25, "learning_rate": 7.316680576204296e-06, "loss": 0.0251, "step": 5533, "task_loss": 0.00792054831981659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959580661604737, "compression_loss": 0.0, "distillation_loss": 0.049572162330150604, "epoch": 5.26, "learning_rate": 7.309150194855668e-06, "loss": 0.0507, "step": 5534, "task_loss": 0.06096307933330536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959622590375662, "compression_loss": 0.0, "distillation_loss": 0.022030137479305267, "epoch": 5.26, "learning_rate": 7.301623027135099e-06, "loss": 0.0206, "step": 5535, "task_loss": 0.008088313043117523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959664490140175, "compression_loss": 0.0, "distillation_loss": 0.2133478969335556, "epoch": 5.26, "learning_rate": 7.294099074409944e-06, "loss": 0.2018, "step": 5536, "task_loss": 0.09802666306495667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959706360908312, "compression_loss": 0.0, "distillation_loss": 0.020365485921502113, "epoch": 5.26, "learning_rate": 7.28657833804697e-06, "loss": 0.0194, "step": 5537, "task_loss": 0.01060546562075615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795974820269011, "compression_loss": 0.0, "distillation_loss": 0.04007776081562042, "epoch": 5.26, "learning_rate": 7.279060819412351e-06, "loss": 0.0394, "step": 5538, "task_loss": 0.03316129371523857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959790015495606, "compression_loss": 0.0, "distillation_loss": 0.03753858804702759, "epoch": 5.26, "learning_rate": 7.271546519871672e-06, "loss": 0.0461, "step": 5539, "task_loss": 0.12282435595989227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959831799334837, "compression_loss": 0.0, "distillation_loss": 0.029383216053247452, "epoch": 5.26, "learning_rate": 7.264035440789954e-06, "loss": 0.0275, "step": 5540, "task_loss": 0.010303637012839317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959873554217839, "compression_loss": 0.0, "distillation_loss": 0.06451848894357681, "epoch": 5.26, "learning_rate": 7.25652758353162e-06, "loss": 0.0609, "step": 5541, "task_loss": 0.02830340340733528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.795991528015465, "compression_loss": 0.0, "distillation_loss": 0.07822129875421524, "epoch": 5.26, "learning_rate": 7.249022949460493e-06, "loss": 0.0823, "step": 5542, "task_loss": 0.1194855198264122 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959956977155306, "compression_loss": 0.0, "distillation_loss": 0.11556608974933624, "epoch": 5.26, "learning_rate": 7.2415215399398435e-06, "loss": 0.1179, "step": 5543, "task_loss": 0.13891972601413727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7959998645229844, "compression_loss": 0.0, "distillation_loss": 0.017915979027748108, "epoch": 5.26, "learning_rate": 7.2340233563323284e-06, "loss": 0.0175, "step": 5544, "task_loss": 0.014106318354606628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960040284388301, "compression_loss": 0.0, "distillation_loss": 0.05048777535557747, "epoch": 5.27, "learning_rate": 7.226528400000038e-06, "loss": 0.0469, "step": 5545, "task_loss": 0.014579741284251213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960081894640715, "compression_loss": 0.0, "distillation_loss": 0.02436014637351036, "epoch": 5.27, "learning_rate": 7.219036672304452e-06, "loss": 0.0269, "step": 5546, "task_loss": 0.04929090291261673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796012347599712, "compression_loss": 0.0, "distillation_loss": 0.1192215234041214, "epoch": 5.27, "learning_rate": 7.2115481746065e-06, "loss": 0.1251, "step": 5547, "task_loss": 0.17761507630348206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960165028467554, "compression_loss": 0.0, "distillation_loss": 0.05981665849685669, "epoch": 5.27, "learning_rate": 7.20406290826649e-06, "loss": 0.0593, "step": 5548, "task_loss": 0.054768696427345276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960206552062056, "compression_loss": 0.0, "distillation_loss": 0.07405254989862442, "epoch": 5.27, "learning_rate": 7.196580874644151e-06, "loss": 0.079, "step": 5549, "task_loss": 0.12388560175895691 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960248046790659, "compression_loss": 0.0, "distillation_loss": 0.0403749905526638, "epoch": 5.27, "learning_rate": 7.1891020750986475e-06, "loss": 0.0485, "step": 5550, "task_loss": 0.12145104259252548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960289512663403, "compression_loss": 0.0, "distillation_loss": 0.02248249761760235, "epoch": 5.27, "learning_rate": 7.181626510988529e-06, "loss": 0.0207, "step": 5551, "task_loss": 0.004492869600653648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960330949690324, "compression_loss": 0.0, "distillation_loss": 0.07457023113965988, "epoch": 5.27, "learning_rate": 7.174154183671763e-06, "loss": 0.082, "step": 5552, "task_loss": 0.148670956492424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960372357881458, "compression_loss": 0.0, "distillation_loss": 0.06673236936330795, "epoch": 5.27, "learning_rate": 7.166685094505737e-06, "loss": 0.075, "step": 5553, "task_loss": 0.14950726926326752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960413737246842, "compression_loss": 0.0, "distillation_loss": 0.036908023059368134, "epoch": 5.27, "learning_rate": 7.15921924484726e-06, "loss": 0.052, "step": 5554, "task_loss": 0.18809108436107635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960455087796513, "compression_loss": 0.0, "distillation_loss": 0.021199818700551987, "epoch": 5.28, "learning_rate": 7.1517566360525284e-06, "loss": 0.0293, "step": 5555, "task_loss": 0.10172471404075623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960496409540508, "compression_loss": 0.0, "distillation_loss": 0.10079994797706604, "epoch": 5.28, "learning_rate": 7.1442972694771545e-06, "loss": 0.0973, "step": 5556, "task_loss": 0.06552006304264069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960537702488863, "compression_loss": 0.0, "distillation_loss": 0.0728619322180748, "epoch": 5.28, "learning_rate": 7.136841146476181e-06, "loss": 0.0749, "step": 5557, "task_loss": 0.09358760714530945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960578966651618, "compression_loss": 0.0, "distillation_loss": 0.026422590017318726, "epoch": 5.28, "learning_rate": 7.129388268404047e-06, "loss": 0.0249, "step": 5558, "task_loss": 0.011311305686831474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960620202038805, "compression_loss": 0.0, "distillation_loss": 0.04695506393909454, "epoch": 5.28, "learning_rate": 7.121938636614589e-06, "loss": 0.0484, "step": 5559, "task_loss": 0.06093965470790863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960661408660464, "compression_loss": 0.0, "distillation_loss": 0.02747797593474388, "epoch": 5.28, "learning_rate": 7.114492252461089e-06, "loss": 0.0339, "step": 5560, "task_loss": 0.09208115935325623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960702586526632, "compression_loss": 0.0, "distillation_loss": 0.03135443478822708, "epoch": 5.28, "learning_rate": 7.1070491172962e-06, "loss": 0.0301, "step": 5561, "task_loss": 0.01891678385436535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960743735647344, "compression_loss": 0.0, "distillation_loss": 0.060153115540742874, "epoch": 5.28, "learning_rate": 7.099609232472021e-06, "loss": 0.0603, "step": 5562, "task_loss": 0.06156299635767937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960784856032638, "compression_loss": 0.0, "distillation_loss": 0.02115842141211033, "epoch": 5.28, "learning_rate": 7.092172599340024e-06, "loss": 0.0236, "step": 5563, "task_loss": 0.045783985406160355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960825947692549, "compression_loss": 0.0, "distillation_loss": 0.016983792185783386, "epoch": 5.28, "learning_rate": 7.084739219251129e-06, "loss": 0.0159, "step": 5564, "task_loss": 0.005871200934052467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960867010637117, "compression_loss": 0.0, "distillation_loss": 0.03363943099975586, "epoch": 5.28, "learning_rate": 7.0773090935556365e-06, "loss": 0.0311, "step": 5565, "task_loss": 0.00811656005680561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960908044876377, "compression_loss": 0.0, "distillation_loss": 0.019596152007579803, "epoch": 5.29, "learning_rate": 7.0698822236032554e-06, "loss": 0.018, "step": 5566, "task_loss": 0.0034526288509368896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7960949050420365, "compression_loss": 0.0, "distillation_loss": 0.04396222531795502, "epoch": 5.29, "learning_rate": 7.0624586107431276e-06, "loss": 0.049, "step": 5567, "task_loss": 0.09396674484014511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796099002727912, "compression_loss": 0.0, "distillation_loss": 0.04795515164732933, "epoch": 5.29, "learning_rate": 7.055038256323782e-06, "loss": 0.0508, "step": 5568, "task_loss": 0.07625043392181396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961030975462677, "compression_loss": 0.0, "distillation_loss": 0.03373149782419205, "epoch": 5.29, "learning_rate": 7.047621161693152e-06, "loss": 0.0355, "step": 5569, "task_loss": 0.051260001957416534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961071894981073, "compression_loss": 0.0, "distillation_loss": 0.042289115488529205, "epoch": 5.29, "learning_rate": 7.040207328198601e-06, "loss": 0.0427, "step": 5570, "task_loss": 0.04608525335788727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961112785844346, "compression_loss": 0.0, "distillation_loss": 0.0194284338504076, "epoch": 5.29, "learning_rate": 7.032796757186888e-06, "loss": 0.0219, "step": 5571, "task_loss": 0.04372316598892212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961153648062532, "compression_loss": 0.0, "distillation_loss": 0.028270315378904343, "epoch": 5.29, "learning_rate": 7.025389450004177e-06, "loss": 0.0273, "step": 5572, "task_loss": 0.018285321071743965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961194481645667, "compression_loss": 0.0, "distillation_loss": 0.11180476099252701, "epoch": 5.29, "learning_rate": 7.017985407996031e-06, "loss": 0.1099, "step": 5573, "task_loss": 0.09264665842056274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961235286603789, "compression_loss": 0.0, "distillation_loss": 0.08373094350099564, "epoch": 5.29, "learning_rate": 7.010584632507444e-06, "loss": 0.0796, "step": 5574, "task_loss": 0.04215511679649353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961276062946936, "compression_loss": 0.0, "distillation_loss": 0.08096636831760406, "epoch": 5.29, "learning_rate": 7.0031871248827985e-06, "loss": 0.0826, "step": 5575, "task_loss": 0.0974457859992981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961316810685142, "compression_loss": 0.0, "distillation_loss": 0.12849192321300507, "epoch": 5.3, "learning_rate": 6.99579288646588e-06, "loss": 0.1266, "step": 5576, "task_loss": 0.10947559773921967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961357529828446, "compression_loss": 0.0, "distillation_loss": 0.08771924674510956, "epoch": 5.3, "learning_rate": 6.988401918599896e-06, "loss": 0.1008, "step": 5577, "task_loss": 0.2185194492340088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961398220386883, "compression_loss": 0.0, "distillation_loss": 0.10648892819881439, "epoch": 5.3, "learning_rate": 6.981014222627444e-06, "loss": 0.0988, "step": 5578, "task_loss": 0.029211118817329407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961438882370491, "compression_loss": 0.0, "distillation_loss": 0.020999478176236153, "epoch": 5.3, "learning_rate": 6.973629799890544e-06, "loss": 0.0194, "step": 5579, "task_loss": 0.005250850692391396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961479515789307, "compression_loss": 0.0, "distillation_loss": 0.04771939665079117, "epoch": 5.3, "learning_rate": 6.9662486517306005e-06, "loss": 0.0527, "step": 5580, "task_loss": 0.0980047881603241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961520120653367, "compression_loss": 0.0, "distillation_loss": 0.051039569079875946, "epoch": 5.3, "learning_rate": 6.958870779488447e-06, "loss": 0.0487, "step": 5581, "task_loss": 0.027397800236940384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961560696972708, "compression_loss": 0.0, "distillation_loss": 0.027529723942279816, "epoch": 5.3, "learning_rate": 6.951496184504306e-06, "loss": 0.028, "step": 5582, "task_loss": 0.03213750571012497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961601244757368, "compression_loss": 0.0, "distillation_loss": 0.09078949689865112, "epoch": 5.3, "learning_rate": 6.944124868117796e-06, "loss": 0.0868, "step": 5583, "task_loss": 0.05128881335258484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961641764017382, "compression_loss": 0.0, "distillation_loss": 0.021352669224143028, "epoch": 5.3, "learning_rate": 6.93675683166797e-06, "loss": 0.0196, "step": 5584, "task_loss": 0.00414588488638401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961682254762789, "compression_loss": 0.0, "distillation_loss": 0.033476557582616806, "epoch": 5.3, "learning_rate": 6.92939207649326e-06, "loss": 0.0385, "step": 5585, "task_loss": 0.08368590474128723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961722717003623, "compression_loss": 0.0, "distillation_loss": 0.025415003299713135, "epoch": 5.3, "learning_rate": 6.922030603931506e-06, "loss": 0.0263, "step": 5586, "task_loss": 0.03441638872027397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961763150749924, "compression_loss": 0.0, "distillation_loss": 0.03962051123380661, "epoch": 5.31, "learning_rate": 6.914672415319945e-06, "loss": 0.0514, "step": 5587, "task_loss": 0.1574546843767166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961803556011726, "compression_loss": 0.0, "distillation_loss": 0.029824821278452873, "epoch": 5.31, "learning_rate": 6.907317511995251e-06, "loss": 0.0368, "step": 5588, "task_loss": 0.09920459985733032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961843932799068, "compression_loss": 0.0, "distillation_loss": 0.041083455085754395, "epoch": 5.31, "learning_rate": 6.8999658952934695e-06, "loss": 0.0547, "step": 5589, "task_loss": 0.1776200234889984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961884281121985, "compression_loss": 0.0, "distillation_loss": 0.07152897119522095, "epoch": 5.31, "learning_rate": 6.892617566550044e-06, "loss": 0.0727, "step": 5590, "task_loss": 0.0837155282497406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961924600990515, "compression_loss": 0.0, "distillation_loss": 0.015575017780065536, "epoch": 5.31, "learning_rate": 6.885272527099853e-06, "loss": 0.0203, "step": 5591, "task_loss": 0.06283999979496002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7961964892414695, "compression_loss": 0.0, "distillation_loss": 0.22274234890937805, "epoch": 5.31, "learning_rate": 6.87793077827715e-06, "loss": 0.2181, "step": 5592, "task_loss": 0.17664700746536255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796200515540456, "compression_loss": 0.0, "distillation_loss": 0.09952651709318161, "epoch": 5.31, "learning_rate": 6.8705923214155945e-06, "loss": 0.1033, "step": 5593, "task_loss": 0.13760796189308167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962045389970149, "compression_loss": 0.0, "distillation_loss": 0.013694360852241516, "epoch": 5.31, "learning_rate": 6.863257157848252e-06, "loss": 0.0129, "step": 5594, "task_loss": 0.006061408668756485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962085596121498, "compression_loss": 0.0, "distillation_loss": 0.03154522925615311, "epoch": 5.31, "learning_rate": 6.8559252889076e-06, "loss": 0.0357, "step": 5595, "task_loss": 0.0730682909488678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962125773868644, "compression_loss": 0.0, "distillation_loss": 0.05232825130224228, "epoch": 5.31, "learning_rate": 6.848596715925493e-06, "loss": 0.0489, "step": 5596, "task_loss": 0.018482720479369164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962165923221622, "compression_loss": 0.0, "distillation_loss": 0.020783985033631325, "epoch": 5.32, "learning_rate": 6.8412714402332125e-06, "loss": 0.0192, "step": 5597, "task_loss": 0.0049543604254722595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962206044190472, "compression_loss": 0.0, "distillation_loss": 0.01990017667412758, "epoch": 5.32, "learning_rate": 6.833949463161438e-06, "loss": 0.0335, "step": 5598, "task_loss": 0.1554040163755417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962246136785229, "compression_loss": 0.0, "distillation_loss": 0.027097290381789207, "epoch": 5.32, "learning_rate": 6.826630786040228e-06, "loss": 0.0248, "step": 5599, "task_loss": 0.0043129827827215195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796228620101593, "compression_loss": 0.0, "distillation_loss": 0.04166903346776962, "epoch": 5.32, "learning_rate": 6.819315410199062e-06, "loss": 0.0441, "step": 5600, "task_loss": 0.06640726327896118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962326236892612, "compression_loss": 0.0, "distillation_loss": 0.02872236631810665, "epoch": 5.32, "learning_rate": 6.812003336966802e-06, "loss": 0.0325, "step": 5601, "task_loss": 0.06612066179513931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962366244425311, "compression_loss": 0.0, "distillation_loss": 0.04668932408094406, "epoch": 5.32, "learning_rate": 6.8046945676717375e-06, "loss": 0.0442, "step": 5602, "task_loss": 0.02129777893424034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962406223624064, "compression_loss": 0.0, "distillation_loss": 0.010439068078994751, "epoch": 5.32, "learning_rate": 6.7973891036415354e-06, "loss": 0.0101, "step": 5603, "task_loss": 0.007036501541733742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796244617449891, "compression_loss": 0.0, "distillation_loss": 0.017626464366912842, "epoch": 5.32, "learning_rate": 6.790086946203253e-06, "loss": 0.0233, "step": 5604, "task_loss": 0.07470369338989258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962486097059883, "compression_loss": 0.0, "distillation_loss": 0.1538507342338562, "epoch": 5.32, "learning_rate": 6.78278809668339e-06, "loss": 0.152, "step": 5605, "task_loss": 0.13536766171455383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962525991317022, "compression_loss": 0.0, "distillation_loss": 0.05390554666519165, "epoch": 5.32, "learning_rate": 6.775492556407806e-06, "loss": 0.054, "step": 5606, "task_loss": 0.05467413738369942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962565857280361, "compression_loss": 0.0, "distillation_loss": 0.06552339345216751, "epoch": 5.32, "learning_rate": 6.768200326701768e-06, "loss": 0.0617, "step": 5607, "task_loss": 0.027765335515141487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962605694959941, "compression_loss": 0.0, "distillation_loss": 0.04036558046936989, "epoch": 5.33, "learning_rate": 6.760911408889939e-06, "loss": 0.0452, "step": 5608, "task_loss": 0.08895239233970642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962645504365795, "compression_loss": 0.0, "distillation_loss": 0.14514732360839844, "epoch": 5.33, "learning_rate": 6.753625804296401e-06, "loss": 0.1502, "step": 5609, "task_loss": 0.19537782669067383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962685285507961, "compression_loss": 0.0, "distillation_loss": 0.04015417769551277, "epoch": 5.33, "learning_rate": 6.746343514244611e-06, "loss": 0.0367, "step": 5610, "task_loss": 0.006075270473957062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962725038396478, "compression_loss": 0.0, "distillation_loss": 0.07076828181743622, "epoch": 5.33, "learning_rate": 6.739064540057424e-06, "loss": 0.0669, "step": 5611, "task_loss": 0.03183208778500557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962764763041379, "compression_loss": 0.0, "distillation_loss": 0.0222453735768795, "epoch": 5.33, "learning_rate": 6.731788883057116e-06, "loss": 0.0287, "step": 5612, "task_loss": 0.08726339787244797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962804459452703, "compression_loss": 0.0, "distillation_loss": 0.03244253247976303, "epoch": 5.33, "learning_rate": 6.724516544565332e-06, "loss": 0.0367, "step": 5613, "task_loss": 0.07509634643793106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962844127640487, "compression_loss": 0.0, "distillation_loss": 0.017709966748952866, "epoch": 5.33, "learning_rate": 6.717247525903142e-06, "loss": 0.0164, "step": 5614, "task_loss": 0.004615955054759979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962883767614767, "compression_loss": 0.0, "distillation_loss": 0.04239179193973541, "epoch": 5.33, "learning_rate": 6.709981828390979e-06, "loss": 0.0433, "step": 5615, "task_loss": 0.051517605781555176 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796292337938558, "compression_loss": 0.0, "distillation_loss": 0.0392778217792511, "epoch": 5.33, "learning_rate": 6.70271945334871e-06, "loss": 0.0433, "step": 5616, "task_loss": 0.07987174391746521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7962962962962964, "compression_loss": 0.0, "distillation_loss": 0.05544783174991608, "epoch": 5.33, "learning_rate": 6.695460402095577e-06, "loss": 0.0553, "step": 5617, "task_loss": 0.0541701577603817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963002518356953, "compression_loss": 0.0, "distillation_loss": 0.025989163666963577, "epoch": 5.34, "learning_rate": 6.688204675950205e-06, "loss": 0.0389, "step": 5618, "task_loss": 0.15557032823562622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963042045577587, "compression_loss": 0.0, "distillation_loss": 0.029921121895313263, "epoch": 5.34, "learning_rate": 6.6809522762306566e-06, "loss": 0.0329, "step": 5619, "task_loss": 0.05976049602031708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963081544634901, "compression_loss": 0.0, "distillation_loss": 0.044836804270744324, "epoch": 5.34, "learning_rate": 6.673703204254347e-06, "loss": 0.0436, "step": 5620, "task_loss": 0.03275785595178604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963121015538933, "compression_loss": 0.0, "distillation_loss": 0.037071071565151215, "epoch": 5.34, "learning_rate": 6.666457461338108e-06, "loss": 0.0357, "step": 5621, "task_loss": 0.023638306185603142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963160458299718, "compression_loss": 0.0, "distillation_loss": 0.0316130630671978, "epoch": 5.34, "learning_rate": 6.659215048798164e-06, "loss": 0.0289, "step": 5622, "task_loss": 0.004098406061530113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963199872927296, "compression_loss": 0.0, "distillation_loss": 0.021150682121515274, "epoch": 5.34, "learning_rate": 6.651975967950147e-06, "loss": 0.0196, "step": 5623, "task_loss": 0.005362769588828087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79632392594317, "compression_loss": 0.0, "distillation_loss": 0.029035231098532677, "epoch": 5.34, "learning_rate": 6.644740220109058e-06, "loss": 0.0267, "step": 5624, "task_loss": 0.0061414651572704315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963278617822969, "compression_loss": 0.0, "distillation_loss": 0.06975620985031128, "epoch": 5.34, "learning_rate": 6.6375078065893e-06, "loss": 0.0782, "step": 5625, "task_loss": 0.15371599793434143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963317948111139, "compression_loss": 0.0, "distillation_loss": 0.06846155226230621, "epoch": 5.34, "learning_rate": 6.630278728704692e-06, "loss": 0.0738, "step": 5626, "task_loss": 0.12186034023761749 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963357250306248, "compression_loss": 0.0, "distillation_loss": 0.05929230898618698, "epoch": 5.34, "learning_rate": 6.6230529877684215e-06, "loss": 0.0548, "step": 5627, "task_loss": 0.014448923990130424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963396524418331, "compression_loss": 0.0, "distillation_loss": 0.032660387456417084, "epoch": 5.34, "learning_rate": 6.615830585093074e-06, "loss": 0.0351, "step": 5628, "task_loss": 0.05667547136545181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963435770457427, "compression_loss": 0.0, "distillation_loss": 0.01413150317966938, "epoch": 5.35, "learning_rate": 6.6086115219906485e-06, "loss": 0.0132, "step": 5629, "task_loss": 0.004492944106459618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963474988433571, "compression_loss": 0.0, "distillation_loss": 0.11651385575532913, "epoch": 5.35, "learning_rate": 6.601395799772503e-06, "loss": 0.1489, "step": 5630, "task_loss": 0.4406876266002655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79635141783568, "compression_loss": 0.0, "distillation_loss": 0.08212954550981522, "epoch": 5.35, "learning_rate": 6.594183419749431e-06, "loss": 0.0788, "step": 5631, "task_loss": 0.048624441027641296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963553340237152, "compression_loss": 0.0, "distillation_loss": 0.024417495355010033, "epoch": 5.35, "learning_rate": 6.586974383231573e-06, "loss": 0.0427, "step": 5632, "task_loss": 0.20715758204460144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963592474084663, "compression_loss": 0.0, "distillation_loss": 0.07218081504106522, "epoch": 5.35, "learning_rate": 6.579768691528504e-06, "loss": 0.0702, "step": 5633, "task_loss": 0.052654679864645004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796363157990937, "compression_loss": 0.0, "distillation_loss": 0.04733956977725029, "epoch": 5.35, "learning_rate": 6.572566345949166e-06, "loss": 0.0439, "step": 5634, "task_loss": 0.012745276093482971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796367065772131, "compression_loss": 0.0, "distillation_loss": 0.02717157080769539, "epoch": 5.35, "learning_rate": 6.565367347801893e-06, "loss": 0.0268, "step": 5635, "task_loss": 0.023610301315784454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796370970753052, "compression_loss": 0.0, "distillation_loss": 0.029316646978259087, "epoch": 5.35, "learning_rate": 6.5581716983944274e-06, "loss": 0.0272, "step": 5636, "task_loss": 0.008220043033361435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963748729347035, "compression_loss": 0.0, "distillation_loss": 0.12085194140672684, "epoch": 5.35, "learning_rate": 6.550979399033894e-06, "loss": 0.1194, "step": 5637, "task_loss": 0.10640271008014679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963787723180894, "compression_loss": 0.0, "distillation_loss": 0.033697567880153656, "epoch": 5.35, "learning_rate": 6.5437904510267935e-06, "loss": 0.0313, "step": 5638, "task_loss": 0.010095924139022827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963826689042134, "compression_loss": 0.0, "distillation_loss": 0.14860667288303375, "epoch": 5.36, "learning_rate": 6.536604855679043e-06, "loss": 0.1594, "step": 5639, "task_loss": 0.2568969428539276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963865626940789, "compression_loss": 0.0, "distillation_loss": 0.030187159776687622, "epoch": 5.36, "learning_rate": 6.529422614295949e-06, "loss": 0.0275, "step": 5640, "task_loss": 0.003732619807124138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79639045368869, "compression_loss": 0.0, "distillation_loss": 0.024757564067840576, "epoch": 5.36, "learning_rate": 6.522243728182195e-06, "loss": 0.0257, "step": 5641, "task_loss": 0.03456170856952667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79639434188905, "compression_loss": 0.0, "distillation_loss": 0.07199036329984665, "epoch": 5.36, "learning_rate": 6.5150681986418466e-06, "loss": 0.0716, "step": 5642, "task_loss": 0.06780923902988434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7963982272961627, "compression_loss": 0.0, "distillation_loss": 0.02806227095425129, "epoch": 5.36, "learning_rate": 6.507896026978394e-06, "loss": 0.0354, "step": 5643, "task_loss": 0.1011197566986084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964021099110319, "compression_loss": 0.0, "distillation_loss": 0.05786515027284622, "epoch": 5.36, "learning_rate": 6.500727214494687e-06, "loss": 0.0632, "step": 5644, "task_loss": 0.11146190017461777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964059897346611, "compression_loss": 0.0, "distillation_loss": 0.05632102116942406, "epoch": 5.36, "learning_rate": 6.493561762492966e-06, "loss": 0.077, "step": 5645, "task_loss": 0.2627117931842804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964098667680543, "compression_loss": 0.0, "distillation_loss": 0.05039035528898239, "epoch": 5.36, "learning_rate": 6.48639967227489e-06, "loss": 0.056, "step": 5646, "task_loss": 0.10663844645023346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964137410122147, "compression_loss": 0.0, "distillation_loss": 0.057316750288009644, "epoch": 5.36, "learning_rate": 6.4792409451414735e-06, "loss": 0.0602, "step": 5647, "task_loss": 0.08620242029428482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964176124681465, "compression_loss": 0.0, "distillation_loss": 0.04628031328320503, "epoch": 5.36, "learning_rate": 6.472085582393128e-06, "loss": 0.0488, "step": 5648, "task_loss": 0.07143149524927139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796421481136853, "compression_loss": 0.0, "distillation_loss": 0.1507255733013153, "epoch": 5.36, "learning_rate": 6.4649335853296685e-06, "loss": 0.1469, "step": 5649, "task_loss": 0.11250782012939453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796425347019338, "compression_loss": 0.0, "distillation_loss": 0.07052607089281082, "epoch": 5.37, "learning_rate": 6.457784955250296e-06, "loss": 0.077, "step": 5650, "task_loss": 0.13528487086296082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964292101166052, "compression_loss": 0.0, "distillation_loss": 0.012097038328647614, "epoch": 5.37, "learning_rate": 6.450639693453589e-06, "loss": 0.0113, "step": 5651, "task_loss": 0.004289904609322548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964330704296583, "compression_loss": 0.0, "distillation_loss": 0.03070550039410591, "epoch": 5.37, "learning_rate": 6.443497801237505e-06, "loss": 0.0397, "step": 5652, "task_loss": 0.12050429731607437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964369279595008, "compression_loss": 0.0, "distillation_loss": 0.02167482301592827, "epoch": 5.37, "learning_rate": 6.436359279899426e-06, "loss": 0.0288, "step": 5653, "task_loss": 0.09340011328458786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964407827071367, "compression_loss": 0.0, "distillation_loss": 0.03893493860960007, "epoch": 5.37, "learning_rate": 6.429224130736084e-06, "loss": 0.0371, "step": 5654, "task_loss": 0.020514240488409996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964446346735695, "compression_loss": 0.0, "distillation_loss": 0.06006263196468353, "epoch": 5.37, "learning_rate": 6.4220923550436106e-06, "loss": 0.0578, "step": 5655, "task_loss": 0.0373433418571949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964484838598029, "compression_loss": 0.0, "distillation_loss": 0.05972398817539215, "epoch": 5.37, "learning_rate": 6.414963954117534e-06, "loss": 0.0573, "step": 5656, "task_loss": 0.035765524953603745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964523302668406, "compression_loss": 0.0, "distillation_loss": 0.11887294799089432, "epoch": 5.37, "learning_rate": 6.407838929252768e-06, "loss": 0.114, "step": 5657, "task_loss": 0.07028041779994965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964561738956862, "compression_loss": 0.0, "distillation_loss": 0.03886423259973526, "epoch": 5.37, "learning_rate": 6.400717281743601e-06, "loss": 0.0367, "step": 5658, "task_loss": 0.017281973734498024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964600147473435, "compression_loss": 0.0, "distillation_loss": 0.023825645446777344, "epoch": 5.37, "learning_rate": 6.393599012883708e-06, "loss": 0.0297, "step": 5659, "task_loss": 0.08249877393245697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964638528228162, "compression_loss": 0.0, "distillation_loss": 0.029711570590734482, "epoch": 5.38, "learning_rate": 6.386484123966171e-06, "loss": 0.0276, "step": 5660, "task_loss": 0.00878220796585083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964676881231078, "compression_loss": 0.0, "distillation_loss": 0.09236393123865128, "epoch": 5.38, "learning_rate": 6.379372616283436e-06, "loss": 0.1032, "step": 5661, "task_loss": 0.20039357244968414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964715206492221, "compression_loss": 0.0, "distillation_loss": 0.045773375779390335, "epoch": 5.38, "learning_rate": 6.372264491127336e-06, "loss": 0.0429, "step": 5662, "task_loss": 0.01664689928293228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964753504021628, "compression_loss": 0.0, "distillation_loss": 0.058652136474847794, "epoch": 5.38, "learning_rate": 6.365159749789112e-06, "loss": 0.0581, "step": 5663, "task_loss": 0.05268477275967598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964791773829336, "compression_loss": 0.0, "distillation_loss": 0.07769237458705902, "epoch": 5.38, "learning_rate": 6.358058393559366e-06, "loss": 0.0717, "step": 5664, "task_loss": 0.017609622329473495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796483001592538, "compression_loss": 0.0, "distillation_loss": 0.01842435821890831, "epoch": 5.38, "learning_rate": 6.350960423728083e-06, "loss": 0.024, "step": 5665, "task_loss": 0.07376164942979813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79648682303198, "compression_loss": 0.0, "distillation_loss": 0.03854244574904442, "epoch": 5.38, "learning_rate": 6.3438658415846565e-06, "loss": 0.0351, "step": 5666, "task_loss": 0.0036998502910137177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796490641702263, "compression_loss": 0.0, "distillation_loss": 0.018844418227672577, "epoch": 5.38, "learning_rate": 6.336774648417854e-06, "loss": 0.0237, "step": 5667, "task_loss": 0.06747782230377197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964944576043909, "compression_loss": 0.0, "distillation_loss": 0.03793220967054367, "epoch": 5.38, "learning_rate": 6.329686845515823e-06, "loss": 0.0457, "step": 5668, "task_loss": 0.11540687829256058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7964982707393672, "compression_loss": 0.0, "distillation_loss": 0.06492698937654495, "epoch": 5.38, "learning_rate": 6.322602434166083e-06, "loss": 0.0729, "step": 5669, "task_loss": 0.14457391202449799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965020811081956, "compression_loss": 0.0, "distillation_loss": 0.08671444654464722, "epoch": 5.38, "learning_rate": 6.315521415655571e-06, "loss": 0.0834, "step": 5670, "task_loss": 0.05390065908432007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965058887118799, "compression_loss": 0.0, "distillation_loss": 0.03182661160826683, "epoch": 5.39, "learning_rate": 6.308443791270579e-06, "loss": 0.0292, "step": 5671, "task_loss": 0.005619386211037636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965096935514236, "compression_loss": 0.0, "distillation_loss": 0.019694361835718155, "epoch": 5.39, "learning_rate": 6.301369562296786e-06, "loss": 0.0201, "step": 5672, "task_loss": 0.02362089231610298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965134956278307, "compression_loss": 0.0, "distillation_loss": 0.09567753225564957, "epoch": 5.39, "learning_rate": 6.294298730019271e-06, "loss": 0.1003, "step": 5673, "task_loss": 0.14172454178333282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965172949421045, "compression_loss": 0.0, "distillation_loss": 0.07337604463100433, "epoch": 5.39, "learning_rate": 6.28723129572247e-06, "loss": 0.0814, "step": 5674, "task_loss": 0.1531829535961151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796521091495249, "compression_loss": 0.0, "distillation_loss": 0.029669515788555145, "epoch": 5.39, "learning_rate": 6.280167260690237e-06, "loss": 0.0346, "step": 5675, "task_loss": 0.07867026329040527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965248852882676, "compression_loss": 0.0, "distillation_loss": 0.02858271822333336, "epoch": 5.39, "learning_rate": 6.273106626205768e-06, "loss": 0.0355, "step": 5676, "task_loss": 0.09754221141338348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965286763221643, "compression_loss": 0.0, "distillation_loss": 0.058425020426511765, "epoch": 5.39, "learning_rate": 6.266049393551679e-06, "loss": 0.0646, "step": 5677, "task_loss": 0.1198449432849884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965324645979425, "compression_loss": 0.0, "distillation_loss": 0.030206482857465744, "epoch": 5.39, "learning_rate": 6.258995564009939e-06, "loss": 0.0289, "step": 5678, "task_loss": 0.017161451280117035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796536250116606, "compression_loss": 0.0, "distillation_loss": 0.016308438032865524, "epoch": 5.39, "learning_rate": 6.251945138861915e-06, "loss": 0.0206, "step": 5679, "task_loss": 0.0590689480304718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965400328791584, "compression_loss": 0.0, "distillation_loss": 0.06689819693565369, "epoch": 5.39, "learning_rate": 6.244898119388337e-06, "loss": 0.0614, "step": 5680, "task_loss": 0.011653106659650803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965438128866036, "compression_loss": 0.0, "distillation_loss": 0.047350913286209106, "epoch": 5.4, "learning_rate": 6.2378545068693505e-06, "loss": 0.0555, "step": 5681, "task_loss": 0.1290304809808731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796547590139945, "compression_loss": 0.0, "distillation_loss": 0.026393115520477295, "epoch": 5.4, "learning_rate": 6.230814302584445e-06, "loss": 0.0243, "step": 5682, "task_loss": 0.005752213299274445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965513646401865, "compression_loss": 0.0, "distillation_loss": 0.023942215368151665, "epoch": 5.4, "learning_rate": 6.223777507812514e-06, "loss": 0.0303, "step": 5683, "task_loss": 0.08767253160476685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965551363883316, "compression_loss": 0.0, "distillation_loss": 0.17860738933086395, "epoch": 5.4, "learning_rate": 6.216744123831836e-06, "loss": 0.1826, "step": 5684, "task_loss": 0.21895524859428406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965589053853842, "compression_loss": 0.0, "distillation_loss": 0.07975465059280396, "epoch": 5.4, "learning_rate": 6.209714151920046e-06, "loss": 0.0936, "step": 5685, "task_loss": 0.21826988458633423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965626716323478, "compression_loss": 0.0, "distillation_loss": 0.03170043230056763, "epoch": 5.4, "learning_rate": 6.2026875933541785e-06, "loss": 0.0301, "step": 5686, "task_loss": 0.015506980940699577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965664351302263, "compression_loss": 0.0, "distillation_loss": 0.05559496954083443, "epoch": 5.4, "learning_rate": 6.195664449410629e-06, "loss": 0.0656, "step": 5687, "task_loss": 0.15556418895721436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965701958800231, "compression_loss": 0.0, "distillation_loss": 0.0222992654889822, "epoch": 5.4, "learning_rate": 6.188644721365203e-06, "loss": 0.0213, "step": 5688, "task_loss": 0.012770412489771843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796573953882742, "compression_loss": 0.0, "distillation_loss": 0.20726126432418823, "epoch": 5.4, "learning_rate": 6.181628410493059e-06, "loss": 0.1953, "step": 5689, "task_loss": 0.08764246851205826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965777091393867, "compression_loss": 0.0, "distillation_loss": 0.05251915007829666, "epoch": 5.4, "learning_rate": 6.174615518068738e-06, "loss": 0.0573, "step": 5690, "task_loss": 0.10022127628326416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965814616509609, "compression_loss": 0.0, "distillation_loss": 0.021990936249494553, "epoch": 5.4, "learning_rate": 6.16760604536617e-06, "loss": 0.022, "step": 5691, "task_loss": 0.02201257273554802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965852114184683, "compression_loss": 0.0, "distillation_loss": 0.02736847475171089, "epoch": 5.41, "learning_rate": 6.1605999936586725e-06, "loss": 0.0252, "step": 5692, "task_loss": 0.005297476425766945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965889584429124, "compression_loss": 0.0, "distillation_loss": 0.02012113854289055, "epoch": 5.41, "learning_rate": 6.153597364218916e-06, "loss": 0.0185, "step": 5693, "task_loss": 0.004320105537772179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7965927027252971, "compression_loss": 0.0, "distillation_loss": 0.05933302640914917, "epoch": 5.41, "learning_rate": 6.146598158318956e-06, "loss": 0.0612, "step": 5694, "task_loss": 0.07751144468784332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796596444266626, "compression_loss": 0.0, "distillation_loss": 0.1531362384557724, "epoch": 5.41, "learning_rate": 6.1396023772302465e-06, "loss": 0.1486, "step": 5695, "task_loss": 0.10774153470993042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966001830679028, "compression_loss": 0.0, "distillation_loss": 0.032585859298706055, "epoch": 5.41, "learning_rate": 6.132610022223598e-06, "loss": 0.0408, "step": 5696, "task_loss": 0.11497241258621216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966039191301312, "compression_loss": 0.0, "distillation_loss": 0.13292592763900757, "epoch": 5.41, "learning_rate": 6.125621094569198e-06, "loss": 0.1305, "step": 5697, "task_loss": 0.10847350209951401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966076524543149, "compression_loss": 0.0, "distillation_loss": 0.036475587636232376, "epoch": 5.41, "learning_rate": 6.118635595536634e-06, "loss": 0.0353, "step": 5698, "task_loss": 0.024255897849798203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966113830414574, "compression_loss": 0.0, "distillation_loss": 0.018289241939783096, "epoch": 5.41, "learning_rate": 6.111653526394839e-06, "loss": 0.0234, "step": 5699, "task_loss": 0.06973688304424286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966151108925625, "compression_loss": 0.0, "distillation_loss": 0.11107459664344788, "epoch": 5.41, "learning_rate": 6.104674888412157e-06, "loss": 0.114, "step": 5700, "task_loss": 0.1403195559978485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796618836008634, "compression_loss": 0.0, "distillation_loss": 0.03960804641246796, "epoch": 5.41, "learning_rate": 6.097699682856275e-06, "loss": 0.0449, "step": 5701, "task_loss": 0.09255840629339218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966225583906754, "compression_loss": 0.0, "distillation_loss": 0.06171734258532524, "epoch": 5.42, "learning_rate": 6.090727910994287e-06, "loss": 0.0701, "step": 5702, "task_loss": 0.1456369161605835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966262780396905, "compression_loss": 0.0, "distillation_loss": 0.02067572996020317, "epoch": 5.42, "learning_rate": 6.083759574092643e-06, "loss": 0.019, "step": 5703, "task_loss": 0.00421629473567009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796629994956683, "compression_loss": 0.0, "distillation_loss": 0.024341976270079613, "epoch": 5.42, "learning_rate": 6.076794673417166e-06, "loss": 0.0314, "step": 5704, "task_loss": 0.09478569775819778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966337091426565, "compression_loss": 0.0, "distillation_loss": 0.00914972648024559, "epoch": 5.42, "learning_rate": 6.069833210233078e-06, "loss": 0.0086, "step": 5705, "task_loss": 0.0033750738948583603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966374205986146, "compression_loss": 0.0, "distillation_loss": 0.08902280032634735, "epoch": 5.42, "learning_rate": 6.062875185804958e-06, "loss": 0.0931, "step": 5706, "task_loss": 0.13023674488067627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966411293255612, "compression_loss": 0.0, "distillation_loss": 0.01705138012766838, "epoch": 5.42, "learning_rate": 6.055920601396753e-06, "loss": 0.025, "step": 5707, "task_loss": 0.09632694721221924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966448353244998, "compression_loss": 0.0, "distillation_loss": 0.03472287952899933, "epoch": 5.42, "learning_rate": 6.048969458271808e-06, "loss": 0.0329, "step": 5708, "task_loss": 0.01673199236392975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966485385964343, "compression_loss": 0.0, "distillation_loss": 0.08574015647172928, "epoch": 5.42, "learning_rate": 6.0420217576928365e-06, "loss": 0.0943, "step": 5709, "task_loss": 0.17116937041282654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796652239142368, "compression_loss": 0.0, "distillation_loss": 0.02010597288608551, "epoch": 5.42, "learning_rate": 6.035077500921918e-06, "loss": 0.0189, "step": 5710, "task_loss": 0.008190853521227837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796655936963305, "compression_loss": 0.0, "distillation_loss": 0.05432071536779404, "epoch": 5.42, "learning_rate": 6.028136689220498e-06, "loss": 0.0523, "step": 5711, "task_loss": 0.03373948112130165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966596320602487, "compression_loss": 0.0, "distillation_loss": 0.027371380478143692, "epoch": 5.42, "learning_rate": 6.021199323849424e-06, "loss": 0.032, "step": 5712, "task_loss": 0.0741061344742775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796663324434203, "compression_loss": 0.0, "distillation_loss": 0.021952230483293533, "epoch": 5.43, "learning_rate": 6.014265406068897e-06, "loss": 0.0308, "step": 5713, "task_loss": 0.1101214811205864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966670140861714, "compression_loss": 0.0, "distillation_loss": 0.04616188630461693, "epoch": 5.43, "learning_rate": 6.00733493713849e-06, "loss": 0.0514, "step": 5714, "task_loss": 0.09814758598804474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966707010171575, "compression_loss": 0.0, "distillation_loss": 0.0602332204580307, "epoch": 5.43, "learning_rate": 6.000407918317167e-06, "loss": 0.0716, "step": 5715, "task_loss": 0.17372044920921326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966743852281654, "compression_loss": 0.0, "distillation_loss": 0.11245335638523102, "epoch": 5.43, "learning_rate": 5.993484350863246e-06, "loss": 0.1094, "step": 5716, "task_loss": 0.0819602832198143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966780667201983, "compression_loss": 0.0, "distillation_loss": 0.033800143748521805, "epoch": 5.43, "learning_rate": 5.986564236034426e-06, "loss": 0.0379, "step": 5717, "task_loss": 0.07496166974306107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966817454942602, "compression_loss": 0.0, "distillation_loss": 0.03140062838792801, "epoch": 5.43, "learning_rate": 5.9796475750877795e-06, "loss": 0.0294, "step": 5718, "task_loss": 0.011493334546685219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966854215513546, "compression_loss": 0.0, "distillation_loss": 0.14226366579532623, "epoch": 5.43, "learning_rate": 5.9727343692797615e-06, "loss": 0.153, "step": 5719, "task_loss": 0.24922578036785126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966890948924853, "compression_loss": 0.0, "distillation_loss": 0.04390806332230568, "epoch": 5.43, "learning_rate": 5.965824619866184e-06, "loss": 0.0484, "step": 5720, "task_loss": 0.08851758390665054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796692765518656, "compression_loss": 0.0, "distillation_loss": 0.05784032121300697, "epoch": 5.43, "learning_rate": 5.958918328102223e-06, "loss": 0.0751, "step": 5721, "task_loss": 0.2306969314813614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7966964334308702, "compression_loss": 0.0, "distillation_loss": 0.11825370043516159, "epoch": 5.43, "learning_rate": 5.9520154952424606e-06, "loss": 0.1196, "step": 5722, "task_loss": 0.13163554668426514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967000986301317, "compression_loss": 0.0, "distillation_loss": 0.03910898417234421, "epoch": 5.43, "learning_rate": 5.945116122540817e-06, "loss": 0.046, "step": 5723, "task_loss": 0.10799519717693329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967037611174442, "compression_loss": 0.0, "distillation_loss": 0.03271537274122238, "epoch": 5.44, "learning_rate": 5.938220211250595e-06, "loss": 0.03, "step": 5724, "task_loss": 0.005857875570654869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967074208938114, "compression_loss": 0.0, "distillation_loss": 0.017977934330701828, "epoch": 5.44, "learning_rate": 5.9313277626244725e-06, "loss": 0.0242, "step": 5725, "task_loss": 0.0803464874625206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796711077960237, "compression_loss": 0.0, "distillation_loss": 0.0364721417427063, "epoch": 5.44, "learning_rate": 5.924438777914504e-06, "loss": 0.0424, "step": 5726, "task_loss": 0.0960368812084198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967147323177246, "compression_loss": 0.0, "distillation_loss": 0.04610713943839073, "epoch": 5.44, "learning_rate": 5.917553258372102e-06, "loss": 0.0538, "step": 5727, "task_loss": 0.12283903360366821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967183839672779, "compression_loss": 0.0, "distillation_loss": 0.06002458930015564, "epoch": 5.44, "learning_rate": 5.910671205248045e-06, "loss": 0.0667, "step": 5728, "task_loss": 0.12689921259880066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967220329099006, "compression_loss": 0.0, "distillation_loss": 0.11154115200042725, "epoch": 5.44, "learning_rate": 5.903792619792506e-06, "loss": 0.1008, "step": 5729, "task_loss": 0.004503896459937096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967256791465963, "compression_loss": 0.0, "distillation_loss": 0.03865112364292145, "epoch": 5.44, "learning_rate": 5.896917503255006e-06, "loss": 0.037, "step": 5730, "task_loss": 0.022370828315615654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967293226783689, "compression_loss": 0.0, "distillation_loss": 0.02259804867208004, "epoch": 5.44, "learning_rate": 5.890045856884435e-06, "loss": 0.0404, "step": 5731, "task_loss": 0.20083081722259521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967329635062219, "compression_loss": 0.0, "distillation_loss": 0.03995134308934212, "epoch": 5.44, "learning_rate": 5.883177681929078e-06, "loss": 0.0678, "step": 5732, "task_loss": 0.31826668977737427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796736601631159, "compression_loss": 0.0, "distillation_loss": 0.038897112011909485, "epoch": 5.44, "learning_rate": 5.876312979636561e-06, "loss": 0.0417, "step": 5733, "task_loss": 0.06685265898704529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967402370541838, "compression_loss": 0.0, "distillation_loss": 0.018638338893651962, "epoch": 5.45, "learning_rate": 5.869451751253885e-06, "loss": 0.0171, "step": 5734, "task_loss": 0.0036374125629663467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967438697763003, "compression_loss": 0.0, "distillation_loss": 0.03524959087371826, "epoch": 5.45, "learning_rate": 5.8625939980274295e-06, "loss": 0.0405, "step": 5735, "task_loss": 0.0877641811966896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967474997985118, "compression_loss": 0.0, "distillation_loss": 0.0975470244884491, "epoch": 5.45, "learning_rate": 5.855739721202952e-06, "loss": 0.0919, "step": 5736, "task_loss": 0.04134809970855713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967511271218222, "compression_loss": 0.0, "distillation_loss": 0.047942765057086945, "epoch": 5.45, "learning_rate": 5.848888922025553e-06, "loss": 0.0523, "step": 5737, "task_loss": 0.09145848453044891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967547517472352, "compression_loss": 0.0, "distillation_loss": 0.05359815061092377, "epoch": 5.45, "learning_rate": 5.842041601739706e-06, "loss": 0.0587, "step": 5738, "task_loss": 0.10473354160785675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967583736757543, "compression_loss": 0.0, "distillation_loss": 0.07147708535194397, "epoch": 5.45, "learning_rate": 5.835197761589275e-06, "loss": 0.0859, "step": 5739, "task_loss": 0.215956911444664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967619929083833, "compression_loss": 0.0, "distillation_loss": 0.047319263219833374, "epoch": 5.45, "learning_rate": 5.828357402817469e-06, "loss": 0.0604, "step": 5740, "task_loss": 0.17839795351028442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796765609446126, "compression_loss": 0.0, "distillation_loss": 0.12125735729932785, "epoch": 5.45, "learning_rate": 5.821520526666868e-06, "loss": 0.1155, "step": 5741, "task_loss": 0.06390950828790665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967692232899859, "compression_loss": 0.0, "distillation_loss": 0.02730424515902996, "epoch": 5.45, "learning_rate": 5.8146871343794315e-06, "loss": 0.0254, "step": 5742, "task_loss": 0.00804077833890915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967728344409667, "compression_loss": 0.0, "distillation_loss": 0.07533392310142517, "epoch": 5.45, "learning_rate": 5.80785722719647e-06, "loss": 0.0823, "step": 5743, "task_loss": 0.14540576934814453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967764429000722, "compression_loss": 0.0, "distillation_loss": 0.1283230483531952, "epoch": 5.45, "learning_rate": 5.801030806358679e-06, "loss": 0.1246, "step": 5744, "task_loss": 0.09111975133419037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796780048668306, "compression_loss": 0.0, "distillation_loss": 0.07009841501712799, "epoch": 5.46, "learning_rate": 5.7942078731061e-06, "loss": 0.0717, "step": 5745, "task_loss": 0.08628898113965988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967836517466718, "compression_loss": 0.0, "distillation_loss": 0.08589686453342438, "epoch": 5.46, "learning_rate": 5.7873884286781615e-06, "loss": 0.0875, "step": 5746, "task_loss": 0.10212301462888718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967872521361732, "compression_loss": 0.0, "distillation_loss": 0.032520610839128494, "epoch": 5.46, "learning_rate": 5.7805724743136445e-06, "loss": 0.0341, "step": 5747, "task_loss": 0.04851553216576576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796790849837814, "compression_loss": 0.0, "distillation_loss": 0.029095636680722237, "epoch": 5.46, "learning_rate": 5.7737600112506925e-06, "loss": 0.0268, "step": 5748, "task_loss": 0.00580623559653759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967944448525979, "compression_loss": 0.0, "distillation_loss": 0.13918885588645935, "epoch": 5.46, "learning_rate": 5.766951040726837e-06, "loss": 0.134, "step": 5749, "task_loss": 0.0872417539358139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7967980371815284, "compression_loss": 0.0, "distillation_loss": 0.03975678235292435, "epoch": 5.46, "learning_rate": 5.760145563978953e-06, "loss": 0.0369, "step": 5750, "task_loss": 0.011658702045679092 }, { "epoch": 5.46, "eval_accuracy": 0.8853211009174312, "eval_loss": 0.4336629807949066, "eval_runtime": 17.9957, "eval_samples_per_second": 48.456, "eval_steps_per_second": 6.057, "step": 5750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968016268256093, "compression_loss": 0.0, "distillation_loss": 0.015128877013921738, "epoch": 5.46, "learning_rate": 5.753343582243278e-06, "loss": 0.014, "step": 5751, "task_loss": 0.0033825524151325226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968052137858443, "compression_loss": 0.0, "distillation_loss": 0.021273450925946236, "epoch": 5.46, "learning_rate": 5.746545096755437e-06, "loss": 0.024, "step": 5752, "task_loss": 0.04804559051990509 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968087980632371, "compression_loss": 0.0, "distillation_loss": 0.08617687970399857, "epoch": 5.46, "learning_rate": 5.739750108750408e-06, "loss": 0.085, "step": 5753, "task_loss": 0.07489560544490814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968123796587915, "compression_loss": 0.0, "distillation_loss": 0.12841589748859406, "epoch": 5.46, "learning_rate": 5.732958619462533e-06, "loss": 0.1363, "step": 5754, "task_loss": 0.2073012888431549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968159585735108, "compression_loss": 0.0, "distillation_loss": 0.03129749745130539, "epoch": 5.47, "learning_rate": 5.72617063012551e-06, "loss": 0.0372, "step": 5755, "task_loss": 0.09041489660739899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796819534808399, "compression_loss": 0.0, "distillation_loss": 0.04228182137012482, "epoch": 5.47, "learning_rate": 5.719386141972419e-06, "loss": 0.0393, "step": 5756, "task_loss": 0.012629145756363869 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968231083644597, "compression_loss": 0.0, "distillation_loss": 0.05197533220052719, "epoch": 5.47, "learning_rate": 5.712605156235695e-06, "loss": 0.0484, "step": 5757, "task_loss": 0.016697099432349205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968266792426966, "compression_loss": 0.0, "distillation_loss": 0.026457346975803375, "epoch": 5.47, "learning_rate": 5.7058276741471236e-06, "loss": 0.0426, "step": 5758, "task_loss": 0.187742680311203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968302474441132, "compression_loss": 0.0, "distillation_loss": 0.08368387818336487, "epoch": 5.47, "learning_rate": 5.6990536969378865e-06, "loss": 0.0848, "step": 5759, "task_loss": 0.09534468501806259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968338129697136, "compression_loss": 0.0, "distillation_loss": 0.036162346601486206, "epoch": 5.47, "learning_rate": 5.692283225838493e-06, "loss": 0.0487, "step": 5760, "task_loss": 0.1613989919424057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796837375820501, "compression_loss": 0.0, "distillation_loss": 0.05870455503463745, "epoch": 5.47, "learning_rate": 5.6855162620788435e-06, "loss": 0.057, "step": 5761, "task_loss": 0.04194345325231552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968409359974794, "compression_loss": 0.0, "distillation_loss": 0.07059012353420258, "epoch": 5.47, "learning_rate": 5.67875280688818e-06, "loss": 0.0739, "step": 5762, "task_loss": 0.1037897914648056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968444935016523, "compression_loss": 0.0, "distillation_loss": 0.015868360176682472, "epoch": 5.47, "learning_rate": 5.671992861495126e-06, "loss": 0.0187, "step": 5763, "task_loss": 0.04378681629896164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968480483340236, "compression_loss": 0.0, "distillation_loss": 0.02727351151406765, "epoch": 5.47, "learning_rate": 5.665236427127654e-06, "loss": 0.0267, "step": 5764, "task_loss": 0.021995794028043747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968516004955968, "compression_loss": 0.0, "distillation_loss": 0.04122058302164078, "epoch": 5.47, "learning_rate": 5.658483505013096e-06, "loss": 0.0494, "step": 5765, "task_loss": 0.12348385155200958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968551499873756, "compression_loss": 0.0, "distillation_loss": 0.10042732208967209, "epoch": 5.48, "learning_rate": 5.651734096378164e-06, "loss": 0.1009, "step": 5766, "task_loss": 0.10483403503894806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968586968103637, "compression_loss": 0.0, "distillation_loss": 0.062294743955135345, "epoch": 5.48, "learning_rate": 5.644988202448917e-06, "loss": 0.0605, "step": 5767, "task_loss": 0.04418262839317322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968622409655649, "compression_loss": 0.0, "distillation_loss": 0.12338921427726746, "epoch": 5.48, "learning_rate": 5.638245824450777e-06, "loss": 0.1259, "step": 5768, "task_loss": 0.14866846799850464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968657824539827, "compression_loss": 0.0, "distillation_loss": 0.041369691491127014, "epoch": 5.48, "learning_rate": 5.631506963608518e-06, "loss": 0.0491, "step": 5769, "task_loss": 0.11868831515312195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968693212766209, "compression_loss": 0.0, "distillation_loss": 0.07022303342819214, "epoch": 5.48, "learning_rate": 5.624771621146313e-06, "loss": 0.0894, "step": 5770, "task_loss": 0.2618139982223511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968728574344831, "compression_loss": 0.0, "distillation_loss": 0.024889115244150162, "epoch": 5.48, "learning_rate": 5.618039798287652e-06, "loss": 0.0231, "step": 5771, "task_loss": 0.006804602220654488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796876390928573, "compression_loss": 0.0, "distillation_loss": 0.04423792287707329, "epoch": 5.48, "learning_rate": 5.6113114962554035e-06, "loss": 0.0483, "step": 5772, "task_loss": 0.08468769490718842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968799217598944, "compression_loss": 0.0, "distillation_loss": 0.036272380501031876, "epoch": 5.48, "learning_rate": 5.6045867162718e-06, "loss": 0.0509, "step": 5773, "task_loss": 0.18288281559944153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968834499294508, "compression_loss": 0.0, "distillation_loss": 0.1168503686785698, "epoch": 5.48, "learning_rate": 5.597865459558435e-06, "loss": 0.1163, "step": 5774, "task_loss": 0.11171647161245346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796886975438246, "compression_loss": 0.0, "distillation_loss": 0.06244365870952606, "epoch": 5.48, "learning_rate": 5.591147727336246e-06, "loss": 0.0676, "step": 5775, "task_loss": 0.11397860199213028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968904982872836, "compression_loss": 0.0, "distillation_loss": 0.027232658118009567, "epoch": 5.49, "learning_rate": 5.584433520825541e-06, "loss": 0.0281, "step": 5776, "task_loss": 0.03567790612578392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7968940184775675, "compression_loss": 0.0, "distillation_loss": 0.08206064254045486, "epoch": 5.49, "learning_rate": 5.577722841245995e-06, "loss": 0.0808, "step": 5777, "task_loss": 0.06914728134870529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796897536010101, "compression_loss": 0.0, "distillation_loss": 0.06064610183238983, "epoch": 5.49, "learning_rate": 5.571015689816639e-06, "loss": 0.0668, "step": 5778, "task_loss": 0.1217413991689682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969010508858883, "compression_loss": 0.0, "distillation_loss": 0.045650772750377655, "epoch": 5.49, "learning_rate": 5.564312067755856e-06, "loss": 0.0601, "step": 5779, "task_loss": 0.19037003815174103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969045631059325, "compression_loss": 0.0, "distillation_loss": 0.07006368786096573, "epoch": 5.49, "learning_rate": 5.5576119762813795e-06, "loss": 0.0666, "step": 5780, "task_loss": 0.03572140634059906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969080726712376, "compression_loss": 0.0, "distillation_loss": 0.02565773017704487, "epoch": 5.49, "learning_rate": 5.550915416610331e-06, "loss": 0.0336, "step": 5781, "task_loss": 0.10518058389425278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969115795828073, "compression_loss": 0.0, "distillation_loss": 0.03800772875547409, "epoch": 5.49, "learning_rate": 5.544222389959164e-06, "loss": 0.0355, "step": 5782, "task_loss": 0.012635795399546623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969150838416453, "compression_loss": 0.0, "distillation_loss": 0.03859638050198555, "epoch": 5.49, "learning_rate": 5.537532897543695e-06, "loss": 0.0359, "step": 5783, "task_loss": 0.011166783049702644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969185854487552, "compression_loss": 0.0, "distillation_loss": 0.05814467743039131, "epoch": 5.49, "learning_rate": 5.530846940579112e-06, "loss": 0.0551, "step": 5784, "task_loss": 0.028073014691472054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969220844051406, "compression_loss": 0.0, "distillation_loss": 0.02096547558903694, "epoch": 5.49, "learning_rate": 5.524164520279948e-06, "loss": 0.0194, "step": 5785, "task_loss": 0.005785791203379631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969255807118053, "compression_loss": 0.0, "distillation_loss": 0.03538232296705246, "epoch": 5.49, "learning_rate": 5.5174856378600895e-06, "loss": 0.0328, "step": 5786, "task_loss": 0.00995618849992752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796929074369753, "compression_loss": 0.0, "distillation_loss": 0.017818935215473175, "epoch": 5.5, "learning_rate": 5.510810294532792e-06, "loss": 0.0164, "step": 5787, "task_loss": 0.004038920626044273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969325653799872, "compression_loss": 0.0, "distillation_loss": 0.0632765144109726, "epoch": 5.5, "learning_rate": 5.504138491510674e-06, "loss": 0.067, "step": 5788, "task_loss": 0.1003769114613533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796936053743512, "compression_loss": 0.0, "distillation_loss": 0.03711196035146713, "epoch": 5.5, "learning_rate": 5.497470230005691e-06, "loss": 0.044, "step": 5789, "task_loss": 0.10593129694461823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969395394613306, "compression_loss": 0.0, "distillation_loss": 0.07631154358386993, "epoch": 5.5, "learning_rate": 5.490805511229158e-06, "loss": 0.0861, "step": 5790, "task_loss": 0.173743337392807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969430225344468, "compression_loss": 0.0, "distillation_loss": 0.03230639174580574, "epoch": 5.5, "learning_rate": 5.484144336391769e-06, "loss": 0.0373, "step": 5791, "task_loss": 0.08267946541309357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969465029638645, "compression_loss": 0.0, "distillation_loss": 0.031054720282554626, "epoch": 5.5, "learning_rate": 5.477486706703553e-06, "loss": 0.0289, "step": 5792, "task_loss": 0.009296312928199768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969499807505873, "compression_loss": 0.0, "distillation_loss": 0.09885191917419434, "epoch": 5.5, "learning_rate": 5.470832623373889e-06, "loss": 0.1043, "step": 5793, "task_loss": 0.1528811752796173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969534558956187, "compression_loss": 0.0, "distillation_loss": 0.0922151654958725, "epoch": 5.5, "learning_rate": 5.464182087611538e-06, "loss": 0.0877, "step": 5794, "task_loss": 0.047085706144571304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969569283999626, "compression_loss": 0.0, "distillation_loss": 0.037871234118938446, "epoch": 5.5, "learning_rate": 5.457535100624592e-06, "loss": 0.0475, "step": 5795, "task_loss": 0.133943110704422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969603982646226, "compression_loss": 0.0, "distillation_loss": 0.1495521366596222, "epoch": 5.5, "learning_rate": 5.450891663620519e-06, "loss": 0.1464, "step": 5796, "task_loss": 0.11768987029790878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969638654906023, "compression_loss": 0.0, "distillation_loss": 0.03055592253804207, "epoch": 5.51, "learning_rate": 5.444251777806117e-06, "loss": 0.0286, "step": 5797, "task_loss": 0.011272316798567772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969673300789055, "compression_loss": 0.0, "distillation_loss": 0.1179354190826416, "epoch": 5.51, "learning_rate": 5.43761544438757e-06, "loss": 0.1202, "step": 5798, "task_loss": 0.14022500813007355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969707920305359, "compression_loss": 0.0, "distillation_loss": 0.02442678064107895, "epoch": 5.51, "learning_rate": 5.4309826645703886e-06, "loss": 0.0225, "step": 5799, "task_loss": 0.0055536795407533646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.796974251346497, "compression_loss": 0.0, "distillation_loss": 0.10044455528259277, "epoch": 5.51, "learning_rate": 5.424353439559446e-06, "loss": 0.1063, "step": 5800, "task_loss": 0.15924617648124695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969777080277928, "compression_loss": 0.0, "distillation_loss": 0.028251107782125473, "epoch": 5.51, "learning_rate": 5.417727770558984e-06, "loss": 0.0266, "step": 5801, "task_loss": 0.012232955545186996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969811620754266, "compression_loss": 0.0, "distillation_loss": 0.02132299914956093, "epoch": 5.51, "learning_rate": 5.4111056587725836e-06, "loss": 0.0201, "step": 5802, "task_loss": 0.009178368374705315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969846134904024, "compression_loss": 0.0, "distillation_loss": 0.041062891483306885, "epoch": 5.51, "learning_rate": 5.404487105403172e-06, "loss": 0.0377, "step": 5803, "task_loss": 0.00785096362233162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969880622737237, "compression_loss": 0.0, "distillation_loss": 0.027310824021697044, "epoch": 5.51, "learning_rate": 5.397872111653052e-06, "loss": 0.0253, "step": 5804, "task_loss": 0.006779264658689499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969915084263942, "compression_loss": 0.0, "distillation_loss": 0.17357215285301208, "epoch": 5.51, "learning_rate": 5.3912606787238754e-06, "loss": 0.1662, "step": 5805, "task_loss": 0.09964841604232788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969949519494176, "compression_loss": 0.0, "distillation_loss": 0.03900089114904404, "epoch": 5.51, "learning_rate": 5.384652807816631e-06, "loss": 0.0428, "step": 5806, "task_loss": 0.07701753824949265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7969983928437976, "compression_loss": 0.0, "distillation_loss": 0.026013296097517014, "epoch": 5.51, "learning_rate": 5.378048500131669e-06, "loss": 0.0246, "step": 5807, "task_loss": 0.011500442400574684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970018311105379, "compression_loss": 0.0, "distillation_loss": 0.020743299275636673, "epoch": 5.52, "learning_rate": 5.3714477568687025e-06, "loss": 0.019, "step": 5808, "task_loss": 0.0032861437648534775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970052667506422, "compression_loss": 0.0, "distillation_loss": 0.11350180208683014, "epoch": 5.52, "learning_rate": 5.3648505792267825e-06, "loss": 0.1343, "step": 5809, "task_loss": 0.3215253949165344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970086997651141, "compression_loss": 0.0, "distillation_loss": 0.03125522658228874, "epoch": 5.52, "learning_rate": 5.358256968404312e-06, "loss": 0.0371, "step": 5810, "task_loss": 0.08946846425533295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970121301549574, "compression_loss": 0.0, "distillation_loss": 0.03678523749113083, "epoch": 5.52, "learning_rate": 5.351666925599067e-06, "loss": 0.0436, "step": 5811, "task_loss": 0.10449925065040588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970155579211756, "compression_loss": 0.0, "distillation_loss": 0.013445551507174969, "epoch": 5.52, "learning_rate": 5.345080452008145e-06, "loss": 0.0124, "step": 5812, "task_loss": 0.002551089972257614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970189830647726, "compression_loss": 0.0, "distillation_loss": 0.09279379993677139, "epoch": 5.52, "learning_rate": 5.338497548828025e-06, "loss": 0.0901, "step": 5813, "task_loss": 0.06628113985061646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797022405586752, "compression_loss": 0.0, "distillation_loss": 0.10782104730606079, "epoch": 5.52, "learning_rate": 5.331918217254508e-06, "loss": 0.1014, "step": 5814, "task_loss": 0.043450452387332916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970258254881173, "compression_loss": 0.0, "distillation_loss": 0.022084686905145645, "epoch": 5.52, "learning_rate": 5.325342458482779e-06, "loss": 0.0385, "step": 5815, "task_loss": 0.18579933047294617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970292427698724, "compression_loss": 0.0, "distillation_loss": 0.12914645671844482, "epoch": 5.52, "learning_rate": 5.3187702737073435e-06, "loss": 0.1435, "step": 5816, "task_loss": 0.2728707790374756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797032657433021, "compression_loss": 0.0, "distillation_loss": 0.03833978995680809, "epoch": 5.52, "learning_rate": 5.312201664122068e-06, "loss": 0.0571, "step": 5817, "task_loss": 0.22600466012954712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970360694785666, "compression_loss": 0.0, "distillation_loss": 0.02015780098736286, "epoch": 5.53, "learning_rate": 5.305636630920186e-06, "loss": 0.0231, "step": 5818, "task_loss": 0.04994331672787666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797039478907513, "compression_loss": 0.0, "distillation_loss": 0.12706497311592102, "epoch": 5.53, "learning_rate": 5.299075175294258e-06, "loss": 0.1271, "step": 5819, "task_loss": 0.1269303411245346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970428857208639, "compression_loss": 0.0, "distillation_loss": 0.03767715394496918, "epoch": 5.53, "learning_rate": 5.2925172984361944e-06, "loss": 0.0446, "step": 5820, "task_loss": 0.10687967389822006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797046289919623, "compression_loss": 0.0, "distillation_loss": 0.16567179560661316, "epoch": 5.53, "learning_rate": 5.2859630015372804e-06, "loss": 0.1592, "step": 5821, "task_loss": 0.10095572471618652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970496915047939, "compression_loss": 0.0, "distillation_loss": 0.1395283043384552, "epoch": 5.53, "learning_rate": 5.279412285788138e-06, "loss": 0.1345, "step": 5822, "task_loss": 0.08924393355846405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970530904773803, "compression_loss": 0.0, "distillation_loss": 0.034282200038433075, "epoch": 5.53, "learning_rate": 5.2728651523787285e-06, "loss": 0.0354, "step": 5823, "task_loss": 0.045323435217142105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970564868383858, "compression_loss": 0.0, "distillation_loss": 0.045332517474889755, "epoch": 5.53, "learning_rate": 5.266321602498361e-06, "loss": 0.0573, "step": 5824, "task_loss": 0.16505207121372223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970598805888143, "compression_loss": 0.0, "distillation_loss": 0.036005791276693344, "epoch": 5.53, "learning_rate": 5.2597816373357226e-06, "loss": 0.0365, "step": 5825, "task_loss": 0.04140361398458481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970632717296693, "compression_loss": 0.0, "distillation_loss": 0.06637313961982727, "epoch": 5.53, "learning_rate": 5.25324525807882e-06, "loss": 0.0766, "step": 5826, "task_loss": 0.16905876994132996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970666602619545, "compression_loss": 0.0, "distillation_loss": 0.06400737166404724, "epoch": 5.53, "learning_rate": 5.246712465915011e-06, "loss": 0.0715, "step": 5827, "task_loss": 0.13885146379470825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970700461866737, "compression_loss": 0.0, "distillation_loss": 0.07488502562046051, "epoch": 5.53, "learning_rate": 5.240183262031021e-06, "loss": 0.0757, "step": 5828, "task_loss": 0.08303892612457275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970734295048304, "compression_loss": 0.0, "distillation_loss": 0.09977598488330841, "epoch": 5.54, "learning_rate": 5.233657647612899e-06, "loss": 0.1021, "step": 5829, "task_loss": 0.12288139760494232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970768102174285, "compression_loss": 0.0, "distillation_loss": 0.030375579372048378, "epoch": 5.54, "learning_rate": 5.227135623846069e-06, "loss": 0.0348, "step": 5830, "task_loss": 0.07510203868150711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970801883254715, "compression_loss": 0.0, "distillation_loss": 0.024371018633246422, "epoch": 5.54, "learning_rate": 5.220617191915272e-06, "loss": 0.0227, "step": 5831, "task_loss": 0.008041396737098694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970835638299631, "compression_loss": 0.0, "distillation_loss": 0.02607511542737484, "epoch": 5.54, "learning_rate": 5.214102353004627e-06, "loss": 0.0242, "step": 5832, "task_loss": 0.0076834335923194885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970869367319072, "compression_loss": 0.0, "distillation_loss": 0.13398540019989014, "epoch": 5.54, "learning_rate": 5.207591108297582e-06, "loss": 0.1458, "step": 5833, "task_loss": 0.2525257170200348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7970903070323072, "compression_loss": 0.0, "distillation_loss": 0.0372280478477478, "epoch": 5.54, "learning_rate": 5.201083458976925e-06, "loss": 0.0347, "step": 5834, "task_loss": 0.011982131749391556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797093674732167, "compression_loss": 0.0, "distillation_loss": 0.08168113976716995, "epoch": 5.54, "learning_rate": 5.194579406224817e-06, "loss": 0.0761, "step": 5835, "task_loss": 0.026058735325932503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79709703983249, "compression_loss": 0.0, "distillation_loss": 0.06715555489063263, "epoch": 5.54, "learning_rate": 5.188078951222744e-06, "loss": 0.0648, "step": 5836, "task_loss": 0.043633393943309784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971004023342801, "compression_loss": 0.0, "distillation_loss": 0.3370836079120636, "epoch": 5.54, "learning_rate": 5.181582095151538e-06, "loss": 0.3367, "step": 5837, "task_loss": 0.3332991600036621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971037622385411, "compression_loss": 0.0, "distillation_loss": 0.016430668532848358, "epoch": 5.54, "learning_rate": 5.175088839191392e-06, "loss": 0.019, "step": 5838, "task_loss": 0.04250016063451767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971071195462764, "compression_loss": 0.0, "distillation_loss": 0.029836127534508705, "epoch": 5.55, "learning_rate": 5.168599184521841e-06, "loss": 0.0381, "step": 5839, "task_loss": 0.11209283769130707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971104742584899, "compression_loss": 0.0, "distillation_loss": 0.0611884780228138, "epoch": 5.55, "learning_rate": 5.162113132321758e-06, "loss": 0.0694, "step": 5840, "task_loss": 0.14316709339618683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971138263761851, "compression_loss": 0.0, "distillation_loss": 0.06297457218170166, "epoch": 5.55, "learning_rate": 5.155630683769358e-06, "loss": 0.0591, "step": 5841, "task_loss": 0.02457287907600403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971171759003658, "compression_loss": 0.0, "distillation_loss": 0.04800242558121681, "epoch": 5.55, "learning_rate": 5.149151840042224e-06, "loss": 0.0468, "step": 5842, "task_loss": 0.035977013409137726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971205228320356, "compression_loss": 0.0, "distillation_loss": 0.09977774322032928, "epoch": 5.55, "learning_rate": 5.142676602317259e-06, "loss": 0.0959, "step": 5843, "task_loss": 0.060707636177539825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971238671721983, "compression_loss": 0.0, "distillation_loss": 0.04978405311703682, "epoch": 5.55, "learning_rate": 5.1362049717707165e-06, "loss": 0.0504, "step": 5844, "task_loss": 0.055560655891895294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971272089218575, "compression_loss": 0.0, "distillation_loss": 0.07310109585523605, "epoch": 5.55, "learning_rate": 5.129736949578215e-06, "loss": 0.0786, "step": 5845, "task_loss": 0.1284613013267517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797130548082017, "compression_loss": 0.0, "distillation_loss": 0.029148953035473824, "epoch": 5.55, "learning_rate": 5.123272536914689e-06, "loss": 0.0419, "step": 5846, "task_loss": 0.1563912034034729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971338846536803, "compression_loss": 0.0, "distillation_loss": 0.06112636625766754, "epoch": 5.55, "learning_rate": 5.116811734954429e-06, "loss": 0.0628, "step": 5847, "task_loss": 0.07760436087846756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971372186378511, "compression_loss": 0.0, "distillation_loss": 0.087627112865448, "epoch": 5.55, "learning_rate": 5.1103545448710765e-06, "loss": 0.0942, "step": 5848, "task_loss": 0.15320152044296265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971405500355332, "compression_loss": 0.0, "distillation_loss": 0.04075030982494354, "epoch": 5.55, "learning_rate": 5.103900967837618e-06, "loss": 0.0486, "step": 5849, "task_loss": 0.11899574100971222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971438788477302, "compression_loss": 0.0, "distillation_loss": 0.12920604646205902, "epoch": 5.56, "learning_rate": 5.097451005026369e-06, "loss": 0.1303, "step": 5850, "task_loss": 0.13977056741714478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971472050754459, "compression_loss": 0.0, "distillation_loss": 0.03929203748703003, "epoch": 5.56, "learning_rate": 5.091004657608989e-06, "loss": 0.0456, "step": 5851, "task_loss": 0.10265965014696121 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971505287196838, "compression_loss": 0.0, "distillation_loss": 0.029239453375339508, "epoch": 5.56, "learning_rate": 5.084561926756501e-06, "loss": 0.0311, "step": 5852, "task_loss": 0.047469861805438995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971538497814477, "compression_loss": 0.0, "distillation_loss": 0.014326438307762146, "epoch": 5.56, "learning_rate": 5.078122813639255e-06, "loss": 0.0133, "step": 5853, "task_loss": 0.004335761070251465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971571682617412, "compression_loss": 0.0, "distillation_loss": 0.06414780765771866, "epoch": 5.56, "learning_rate": 5.071687319426946e-06, "loss": 0.0762, "step": 5854, "task_loss": 0.1843860000371933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971604841615682, "compression_loss": 0.0, "distillation_loss": 0.033587224781513214, "epoch": 5.56, "learning_rate": 5.065255445288594e-06, "loss": 0.0479, "step": 5855, "task_loss": 0.1766654998064041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971637974819321, "compression_loss": 0.0, "distillation_loss": 0.09691178798675537, "epoch": 5.56, "learning_rate": 5.058827192392613e-06, "loss": 0.0928, "step": 5856, "task_loss": 0.05549832805991173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971671082238367, "compression_loss": 0.0, "distillation_loss": 0.057575710117816925, "epoch": 5.56, "learning_rate": 5.05240256190671e-06, "loss": 0.0648, "step": 5857, "task_loss": 0.12961743772029877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971704163882857, "compression_loss": 0.0, "distillation_loss": 0.08201977610588074, "epoch": 5.56, "learning_rate": 5.045981554997945e-06, "loss": 0.0782, "step": 5858, "task_loss": 0.043463174253702164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971737219762828, "compression_loss": 0.0, "distillation_loss": 0.015410145744681358, "epoch": 5.56, "learning_rate": 5.039564172832733e-06, "loss": 0.0147, "step": 5859, "task_loss": 0.008038915693759918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971770249888316, "compression_loss": 0.0, "distillation_loss": 0.022354498505592346, "epoch": 5.57, "learning_rate": 5.0331504165768236e-06, "loss": 0.0289, "step": 5860, "task_loss": 0.08798445761203766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971803254269358, "compression_loss": 0.0, "distillation_loss": 0.03102359175682068, "epoch": 5.57, "learning_rate": 5.026740287395298e-06, "loss": 0.0288, "step": 5861, "task_loss": 0.00922648049890995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971836232915992, "compression_loss": 0.0, "distillation_loss": 0.04426976293325424, "epoch": 5.57, "learning_rate": 5.020333786452589e-06, "loss": 0.0411, "step": 5862, "task_loss": 0.012406604364514351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971869185838254, "compression_loss": 0.0, "distillation_loss": 0.026221610605716705, "epoch": 5.57, "learning_rate": 5.013930914912476e-06, "loss": 0.0242, "step": 5863, "task_loss": 0.00634673610329628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971902113046181, "compression_loss": 0.0, "distillation_loss": 0.09950605034828186, "epoch": 5.57, "learning_rate": 5.007531673938059e-06, "loss": 0.1281, "step": 5864, "task_loss": 0.38576656579971313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971935014549808, "compression_loss": 0.0, "distillation_loss": 0.03954896330833435, "epoch": 5.57, "learning_rate": 5.0011360646917996e-06, "loss": 0.043, "step": 5865, "task_loss": 0.07363829016685486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7971967890359175, "compression_loss": 0.0, "distillation_loss": 0.25650808215141296, "epoch": 5.57, "learning_rate": 4.994744088335496e-06, "loss": 0.2497, "step": 5866, "task_loss": 0.1884750872850418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972000740484316, "compression_loss": 0.0, "distillation_loss": 0.022899843752384186, "epoch": 5.57, "learning_rate": 4.9883557460302735e-06, "loss": 0.0278, "step": 5867, "task_loss": 0.07162778824567795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797203356493527, "compression_loss": 0.0, "distillation_loss": 0.0954747349023819, "epoch": 5.57, "learning_rate": 4.9819710389366085e-06, "loss": 0.091, "step": 5868, "task_loss": 0.05076561123132706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972066363722073, "compression_loss": 0.0, "distillation_loss": 0.026118488982319832, "epoch": 5.57, "learning_rate": 4.975589968214303e-06, "loss": 0.0272, "step": 5869, "task_loss": 0.0369403176009655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972099136854761, "compression_loss": 0.0, "distillation_loss": 0.027349824085831642, "epoch": 5.57, "learning_rate": 4.969212535022527e-06, "loss": 0.0296, "step": 5870, "task_loss": 0.05007128417491913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972131884343372, "compression_loss": 0.0, "distillation_loss": 0.03833641856908798, "epoch": 5.58, "learning_rate": 4.962838740519763e-06, "loss": 0.0398, "step": 5871, "task_loss": 0.052747875452041626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972164606197942, "compression_loss": 0.0, "distillation_loss": 0.019336001947522163, "epoch": 5.58, "learning_rate": 4.956468585863835e-06, "loss": 0.0207, "step": 5872, "task_loss": 0.03255251795053482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972197302428509, "compression_loss": 0.0, "distillation_loss": 0.02944711409509182, "epoch": 5.58, "learning_rate": 4.950102072211921e-06, "loss": 0.036, "step": 5873, "task_loss": 0.09525460749864578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972229973045108, "compression_loss": 0.0, "distillation_loss": 0.01285221055150032, "epoch": 5.58, "learning_rate": 4.943739200720532e-06, "loss": 0.0123, "step": 5874, "task_loss": 0.006926748901605606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972262618057777, "compression_loss": 0.0, "distillation_loss": 0.12414573132991791, "epoch": 5.58, "learning_rate": 4.937379972545508e-06, "loss": 0.1184, "step": 5875, "task_loss": 0.06664574146270752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972295237476553, "compression_loss": 0.0, "distillation_loss": 0.08365512639284134, "epoch": 5.58, "learning_rate": 4.9310243888420285e-06, "loss": 0.0858, "step": 5876, "task_loss": 0.1054966151714325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972327831311472, "compression_loss": 0.0, "distillation_loss": 0.03933534771203995, "epoch": 5.58, "learning_rate": 4.9246724507646305e-06, "loss": 0.0408, "step": 5877, "task_loss": 0.0535866804420948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972360399572572, "compression_loss": 0.0, "distillation_loss": 0.015180066227912903, "epoch": 5.58, "learning_rate": 4.918324159467163e-06, "loss": 0.0153, "step": 5878, "task_loss": 0.016459671780467033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972392942269888, "compression_loss": 0.0, "distillation_loss": 0.05211641639471054, "epoch": 5.58, "learning_rate": 4.911979516102822e-06, "loss": 0.0481, "step": 5879, "task_loss": 0.012342775240540504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972425459413459, "compression_loss": 0.0, "distillation_loss": 0.027883626520633698, "epoch": 5.58, "learning_rate": 4.905638521824155e-06, "loss": 0.0401, "step": 5880, "task_loss": 0.1499495804309845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797245795101332, "compression_loss": 0.0, "distillation_loss": 0.10857439041137695, "epoch": 5.58, "learning_rate": 4.899301177783017e-06, "loss": 0.1129, "step": 5881, "task_loss": 0.15197938680648804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972490417079509, "compression_loss": 0.0, "distillation_loss": 0.027678653597831726, "epoch": 5.59, "learning_rate": 4.892967485130631e-06, "loss": 0.0368, "step": 5882, "task_loss": 0.11922503262758255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972522857622062, "compression_loss": 0.0, "distillation_loss": 0.02238599956035614, "epoch": 5.59, "learning_rate": 4.886637445017534e-06, "loss": 0.0206, "step": 5883, "task_loss": 0.0042944010347127914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972555272651016, "compression_loss": 0.0, "distillation_loss": 0.04405056685209274, "epoch": 5.59, "learning_rate": 4.880311058593617e-06, "loss": 0.0569, "step": 5884, "task_loss": 0.17205776274204254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972587662176409, "compression_loss": 0.0, "distillation_loss": 0.038423389196395874, "epoch": 5.59, "learning_rate": 4.873988327008094e-06, "loss": 0.0371, "step": 5885, "task_loss": 0.02501399628818035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972620026208276, "compression_loss": 0.0, "distillation_loss": 0.041545622050762177, "epoch": 5.59, "learning_rate": 4.867669251409512e-06, "loss": 0.0476, "step": 5886, "task_loss": 0.10194090753793716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972652364756655, "compression_loss": 0.0, "distillation_loss": 0.018506459891796112, "epoch": 5.59, "learning_rate": 4.861353832945778e-06, "loss": 0.017, "step": 5887, "task_loss": 0.003717266023159027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972684677831583, "compression_loss": 0.0, "distillation_loss": 0.020966939628124237, "epoch": 5.59, "learning_rate": 4.855042072764107e-06, "loss": 0.0301, "step": 5888, "task_loss": 0.11239578574895859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972716965443095, "compression_loss": 0.0, "distillation_loss": 0.024587448686361313, "epoch": 5.59, "learning_rate": 4.848733972011058e-06, "loss": 0.0317, "step": 5889, "task_loss": 0.09537127614021301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797274922760123, "compression_loss": 0.0, "distillation_loss": 0.03539607301354408, "epoch": 5.59, "learning_rate": 4.842429531832529e-06, "loss": 0.0323, "step": 5890, "task_loss": 0.004601247608661652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972781464316023, "compression_loss": 0.0, "distillation_loss": 0.054549772292375565, "epoch": 5.59, "learning_rate": 4.8361287533737674e-06, "loss": 0.0532, "step": 5891, "task_loss": 0.04105132073163986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972813675597513, "compression_loss": 0.0, "distillation_loss": 0.02551000751554966, "epoch": 5.6, "learning_rate": 4.829831637779322e-06, "loss": 0.0288, "step": 5892, "task_loss": 0.058242082595825195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972845861455735, "compression_loss": 0.0, "distillation_loss": 0.025515582412481308, "epoch": 5.6, "learning_rate": 4.823538186193097e-06, "loss": 0.0285, "step": 5893, "task_loss": 0.0552404448390007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972878021900727, "compression_loss": 0.0, "distillation_loss": 0.027075637131929398, "epoch": 5.6, "learning_rate": 4.817248399758337e-06, "loss": 0.025, "step": 5894, "task_loss": 0.006751839071512222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972910156942524, "compression_loss": 0.0, "distillation_loss": 0.1096268743276596, "epoch": 5.6, "learning_rate": 4.810962279617609e-06, "loss": 0.1301, "step": 5895, "task_loss": 0.31454721093177795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972942266591165, "compression_loss": 0.0, "distillation_loss": 0.04903785511851311, "epoch": 5.6, "learning_rate": 4.804679826912803e-06, "loss": 0.0731, "step": 5896, "task_loss": 0.2901209592819214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7972974350856685, "compression_loss": 0.0, "distillation_loss": 0.0792558565735817, "epoch": 5.6, "learning_rate": 4.798401042785177e-06, "loss": 0.0772, "step": 5897, "task_loss": 0.05906623229384422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973006409749123, "compression_loss": 0.0, "distillation_loss": 0.05788847804069519, "epoch": 5.6, "learning_rate": 4.792125928375296e-06, "loss": 0.0816, "step": 5898, "task_loss": 0.2952689230442047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973038443278514, "compression_loss": 0.0, "distillation_loss": 0.02830476313829422, "epoch": 5.6, "learning_rate": 4.785854484823052e-06, "loss": 0.026, "step": 5899, "task_loss": 0.00540274940431118 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973070451454894, "compression_loss": 0.0, "distillation_loss": 0.018592754378914833, "epoch": 5.6, "learning_rate": 4.779586713267695e-06, "loss": 0.0171, "step": 5900, "task_loss": 0.0032580215483903885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973102434288303, "compression_loss": 0.0, "distillation_loss": 0.05436631664633751, "epoch": 5.6, "learning_rate": 4.7733226148478e-06, "loss": 0.0595, "step": 5901, "task_loss": 0.10618877410888672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973134391788775, "compression_loss": 0.0, "distillation_loss": 0.01007351279258728, "epoch": 5.6, "learning_rate": 4.767062190701266e-06, "loss": 0.0095, "step": 5902, "task_loss": 0.004046119749546051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973166323966349, "compression_loss": 0.0, "distillation_loss": 0.016038384288549423, "epoch": 5.61, "learning_rate": 4.760805441965321e-06, "loss": 0.0263, "step": 5903, "task_loss": 0.11890155076980591 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973198230831059, "compression_loss": 0.0, "distillation_loss": 0.07718884944915771, "epoch": 5.61, "learning_rate": 4.754552369776547e-06, "loss": 0.0741, "step": 5904, "task_loss": 0.046534955501556396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973230112392944, "compression_loss": 0.0, "distillation_loss": 0.05940188467502594, "epoch": 5.61, "learning_rate": 4.748302975270838e-06, "loss": 0.062, "step": 5905, "task_loss": 0.08516646176576614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973261968662041, "compression_loss": 0.0, "distillation_loss": 0.0312301404774189, "epoch": 5.61, "learning_rate": 4.7420572595834185e-06, "loss": 0.0287, "step": 5906, "task_loss": 0.006120791658759117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973293799648385, "compression_loss": 0.0, "distillation_loss": 0.013178318738937378, "epoch": 5.61, "learning_rate": 4.735815223848864e-06, "loss": 0.0122, "step": 5907, "task_loss": 0.003270508721470833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973325605362014, "compression_loss": 0.0, "distillation_loss": 0.05070923641324043, "epoch": 5.61, "learning_rate": 4.7295768692010715e-06, "loss": 0.0471, "step": 5908, "task_loss": 0.014933045953512192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973357385812966, "compression_loss": 0.0, "distillation_loss": 0.030595645308494568, "epoch": 5.61, "learning_rate": 4.723342196773267e-06, "loss": 0.0302, "step": 5909, "task_loss": 0.026463741436600685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973389141011274, "compression_loss": 0.0, "distillation_loss": 0.03401756286621094, "epoch": 5.61, "learning_rate": 4.7171112076979965e-06, "loss": 0.0317, "step": 5910, "task_loss": 0.010913487523794174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797342087096698, "compression_loss": 0.0, "distillation_loss": 0.05812390148639679, "epoch": 5.61, "learning_rate": 4.710883903107166e-06, "loss": 0.0634, "step": 5911, "task_loss": 0.11132801324129105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973452575690118, "compression_loss": 0.0, "distillation_loss": 0.12190339714288712, "epoch": 5.61, "learning_rate": 4.70466028413199e-06, "loss": 0.1407, "step": 5912, "task_loss": 0.3099902868270874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973484255190724, "compression_loss": 0.0, "distillation_loss": 0.01616412214934826, "epoch": 5.62, "learning_rate": 4.6984403519030076e-06, "loss": 0.0148, "step": 5913, "task_loss": 0.002981981262564659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973515909478835, "compression_loss": 0.0, "distillation_loss": 0.0333518460392952, "epoch": 5.62, "learning_rate": 4.692224107550117e-06, "loss": 0.0307, "step": 5914, "task_loss": 0.007058465853333473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973547538564489, "compression_loss": 0.0, "distillation_loss": 0.008787909522652626, "epoch": 5.62, "learning_rate": 4.686011552202518e-06, "loss": 0.0082, "step": 5915, "task_loss": 0.003183361142873764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973579142457724, "compression_loss": 0.0, "distillation_loss": 0.04432186111807823, "epoch": 5.62, "learning_rate": 4.679802686988749e-06, "loss": 0.0479, "step": 5916, "task_loss": 0.0803055539727211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973610721168575, "compression_loss": 0.0, "distillation_loss": 0.018226707354187965, "epoch": 5.62, "learning_rate": 4.673597513036684e-06, "loss": 0.0243, "step": 5917, "task_loss": 0.0785667896270752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973642274707078, "compression_loss": 0.0, "distillation_loss": 0.033110909163951874, "epoch": 5.62, "learning_rate": 4.667396031473534e-06, "loss": 0.0325, "step": 5918, "task_loss": 0.02661288157105446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973673803083271, "compression_loss": 0.0, "distillation_loss": 0.06958150863647461, "epoch": 5.62, "learning_rate": 4.661198243425813e-06, "loss": 0.0723, "step": 5919, "task_loss": 0.0964449942111969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973705306307192, "compression_loss": 0.0, "distillation_loss": 0.04093363136053085, "epoch": 5.62, "learning_rate": 4.655004150019379e-06, "loss": 0.0406, "step": 5920, "task_loss": 0.03757309168577194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973736784388875, "compression_loss": 0.0, "distillation_loss": 0.0374174565076828, "epoch": 5.62, "learning_rate": 4.648813752379433e-06, "loss": 0.0439, "step": 5921, "task_loss": 0.10244208574295044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973768237338359, "compression_loss": 0.0, "distillation_loss": 0.017319556325674057, "epoch": 5.62, "learning_rate": 4.642627051630477e-06, "loss": 0.0167, "step": 5922, "task_loss": 0.011339414864778519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973799665165681, "compression_loss": 0.0, "distillation_loss": 0.028723616153001785, "epoch": 5.62, "learning_rate": 4.636444048896355e-06, "loss": 0.0353, "step": 5923, "task_loss": 0.09466679394245148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973831067880875, "compression_loss": 0.0, "distillation_loss": 0.08423987776041031, "epoch": 5.63, "learning_rate": 4.63026474530025e-06, "loss": 0.0834, "step": 5924, "task_loss": 0.07615604996681213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973862445493982, "compression_loss": 0.0, "distillation_loss": 0.026835883036255836, "epoch": 5.63, "learning_rate": 4.624089141964649e-06, "loss": 0.0268, "step": 5925, "task_loss": 0.025994790717959404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973893798015036, "compression_loss": 0.0, "distillation_loss": 0.05702338367700577, "epoch": 5.63, "learning_rate": 4.617917240011394e-06, "loss": 0.0545, "step": 5926, "task_loss": 0.032026879489421844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973925125454074, "compression_loss": 0.0, "distillation_loss": 0.026394031941890717, "epoch": 5.63, "learning_rate": 4.611749040561625e-06, "loss": 0.0242, "step": 5927, "task_loss": 0.004607599228620529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973956427821134, "compression_loss": 0.0, "distillation_loss": 0.1339588314294815, "epoch": 5.63, "learning_rate": 4.6055845447358415e-06, "loss": 0.1297, "step": 5928, "task_loss": 0.09128076583147049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7973987705126252, "compression_loss": 0.0, "distillation_loss": 0.06346623599529266, "epoch": 5.63, "learning_rate": 4.599423753653845e-06, "loss": 0.0697, "step": 5929, "task_loss": 0.12597879767417908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974018957379464, "compression_loss": 0.0, "distillation_loss": 0.06505750119686127, "epoch": 5.63, "learning_rate": 4.593266668434767e-06, "loss": 0.0716, "step": 5930, "task_loss": 0.13001671433448792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974050184590808, "compression_loss": 0.0, "distillation_loss": 0.05653805285692215, "epoch": 5.63, "learning_rate": 4.587113290197087e-06, "loss": 0.063, "step": 5931, "task_loss": 0.12097219377756119 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974081386770322, "compression_loss": 0.0, "distillation_loss": 0.07613807171583176, "epoch": 5.63, "learning_rate": 4.580963620058587e-06, "loss": 0.0749, "step": 5932, "task_loss": 0.06361458450555801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974112563928041, "compression_loss": 0.0, "distillation_loss": 0.16531898081302643, "epoch": 5.63, "learning_rate": 4.5748176591363795e-06, "loss": 0.1618, "step": 5933, "task_loss": 0.129730224609375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974143716074001, "compression_loss": 0.0, "distillation_loss": 0.03830117732286453, "epoch": 5.64, "learning_rate": 4.56867540854691e-06, "loss": 0.0433, "step": 5934, "task_loss": 0.088396355509758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974174843218241, "compression_loss": 0.0, "distillation_loss": 0.0483672171831131, "epoch": 5.64, "learning_rate": 4.562536869405959e-06, "loss": 0.0454, "step": 5935, "task_loss": 0.01907249167561531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974205945370797, "compression_loss": 0.0, "distillation_loss": 0.029438257217407227, "epoch": 5.64, "learning_rate": 4.556402042828611e-06, "loss": 0.0342, "step": 5936, "task_loss": 0.07672730088233948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974237022541706, "compression_loss": 0.0, "distillation_loss": 0.024926815181970596, "epoch": 5.64, "learning_rate": 4.550270929929288e-06, "loss": 0.0329, "step": 5937, "task_loss": 0.10498930513858795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974268074741003, "compression_loss": 0.0, "distillation_loss": 0.10737212002277374, "epoch": 5.64, "learning_rate": 4.54414353182174e-06, "loss": 0.103, "step": 5938, "task_loss": 0.06395427882671356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974299101978728, "compression_loss": 0.0, "distillation_loss": 0.10752628743648529, "epoch": 5.64, "learning_rate": 4.538019849619035e-06, "loss": 0.0984, "step": 5939, "task_loss": 0.01625998131930828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974330104264915, "compression_loss": 0.0, "distillation_loss": 0.033906545490026474, "epoch": 5.64, "learning_rate": 4.531899884433574e-06, "loss": 0.034, "step": 5940, "task_loss": 0.035197898745536804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974361081609602, "compression_loss": 0.0, "distillation_loss": 0.10264087468385696, "epoch": 5.64, "learning_rate": 4.525783637377065e-06, "loss": 0.0959, "step": 5941, "task_loss": 0.03510020300745964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974392034022827, "compression_loss": 0.0, "distillation_loss": 0.021447885781526566, "epoch": 5.64, "learning_rate": 4.519671109560567e-06, "loss": 0.0358, "step": 5942, "task_loss": 0.16513197124004364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974422961514624, "compression_loss": 0.0, "distillation_loss": 0.12002286314964294, "epoch": 5.64, "learning_rate": 4.5135623020944485e-06, "loss": 0.1311, "step": 5943, "task_loss": 0.23128020763397217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974453864095034, "compression_loss": 0.0, "distillation_loss": 0.04369574412703514, "epoch": 5.64, "learning_rate": 4.507457216088396e-06, "loss": 0.0538, "step": 5944, "task_loss": 0.14453351497650146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974484741774089, "compression_loss": 0.0, "distillation_loss": 0.08547208458185196, "epoch": 5.65, "learning_rate": 4.501355852651443e-06, "loss": 0.0862, "step": 5945, "task_loss": 0.09284761548042297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974515594561828, "compression_loss": 0.0, "distillation_loss": 0.053911492228507996, "epoch": 5.65, "learning_rate": 4.495258212891918e-06, "loss": 0.058, "step": 5946, "task_loss": 0.09494040161371231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974546422468289, "compression_loss": 0.0, "distillation_loss": 0.0476272851228714, "epoch": 5.65, "learning_rate": 4.489164297917492e-06, "loss": 0.0436, "step": 5947, "task_loss": 0.00767885148525238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974577225503507, "compression_loss": 0.0, "distillation_loss": 0.035985067486763, "epoch": 5.65, "learning_rate": 4.483074108835145e-06, "loss": 0.057, "step": 5948, "task_loss": 0.24564029276371002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797460800367752, "compression_loss": 0.0, "distillation_loss": 0.09636256098747253, "epoch": 5.65, "learning_rate": 4.476987646751205e-06, "loss": 0.1025, "step": 5949, "task_loss": 0.15778429806232452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974638757000365, "compression_loss": 0.0, "distillation_loss": 0.07263234257698059, "epoch": 5.65, "learning_rate": 4.470904912771298e-06, "loss": 0.0894, "step": 5950, "task_loss": 0.24070273339748383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974669485482078, "compression_loss": 0.0, "distillation_loss": 0.024517526850104332, "epoch": 5.65, "learning_rate": 4.46482590800037e-06, "loss": 0.0281, "step": 5951, "task_loss": 0.05985688790678978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974700189132695, "compression_loss": 0.0, "distillation_loss": 0.06554505228996277, "epoch": 5.65, "learning_rate": 4.458750633542727e-06, "loss": 0.0605, "step": 5952, "task_loss": 0.015469128265976906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974730867962254, "compression_loss": 0.0, "distillation_loss": 0.022991040721535683, "epoch": 5.65, "learning_rate": 4.45267909050196e-06, "loss": 0.0213, "step": 5953, "task_loss": 0.005838258191943169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974761521980792, "compression_loss": 0.0, "distillation_loss": 0.08440842479467392, "epoch": 5.65, "learning_rate": 4.446611279980992e-06, "loss": 0.0993, "step": 5954, "task_loss": 0.23307430744171143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974792151198347, "compression_loss": 0.0, "distillation_loss": 0.04542703554034233, "epoch": 5.66, "learning_rate": 4.440547203082065e-06, "loss": 0.0493, "step": 5955, "task_loss": 0.08394621312618256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974822755624952, "compression_loss": 0.0, "distillation_loss": 0.02426799014210701, "epoch": 5.66, "learning_rate": 4.434486860906761e-06, "loss": 0.0256, "step": 5956, "task_loss": 0.03762784227728844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974853335270646, "compression_loss": 0.0, "distillation_loss": 0.02789018675684929, "epoch": 5.66, "learning_rate": 4.4284302545559624e-06, "loss": 0.0261, "step": 5957, "task_loss": 0.010452285408973694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974883890145468, "compression_loss": 0.0, "distillation_loss": 0.08330793678760529, "epoch": 5.66, "learning_rate": 4.422377385129878e-06, "loss": 0.0919, "step": 5958, "task_loss": 0.1694803088903427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974914420259451, "compression_loss": 0.0, "distillation_loss": 0.055434923619031906, "epoch": 5.66, "learning_rate": 4.416328253728041e-06, "loss": 0.0574, "step": 5959, "task_loss": 0.07511811703443527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974944925622635, "compression_loss": 0.0, "distillation_loss": 0.022852642461657524, "epoch": 5.66, "learning_rate": 4.410282861449317e-06, "loss": 0.0288, "step": 5960, "task_loss": 0.08201512694358826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7974975406245054, "compression_loss": 0.0, "distillation_loss": 0.023079611361026764, "epoch": 5.66, "learning_rate": 4.404241209391874e-06, "loss": 0.0443, "step": 5961, "task_loss": 0.23562346398830414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975005862136747, "compression_loss": 0.0, "distillation_loss": 0.03505755960941315, "epoch": 5.66, "learning_rate": 4.398203298653195e-06, "loss": 0.0437, "step": 5962, "task_loss": 0.12132131308317184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797503629330775, "compression_loss": 0.0, "distillation_loss": 0.036127813160419464, "epoch": 5.66, "learning_rate": 4.392169130330115e-06, "loss": 0.0389, "step": 5963, "task_loss": 0.06360867619514465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79750666997681, "compression_loss": 0.0, "distillation_loss": 0.03919731080532074, "epoch": 5.66, "learning_rate": 4.386138705518761e-06, "loss": 0.0428, "step": 5964, "task_loss": 0.07522215694189072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975097081527833, "compression_loss": 0.0, "distillation_loss": 0.024520831182599068, "epoch": 5.66, "learning_rate": 4.380112025314581e-06, "loss": 0.0316, "step": 5965, "task_loss": 0.09532146900892258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975127438596987, "compression_loss": 0.0, "distillation_loss": 0.04087524488568306, "epoch": 5.67, "learning_rate": 4.374089090812367e-06, "loss": 0.0448, "step": 5966, "task_loss": 0.0800790786743164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975157770985598, "compression_loss": 0.0, "distillation_loss": 0.018016282469034195, "epoch": 5.67, "learning_rate": 4.368069903106203e-06, "loss": 0.0224, "step": 5967, "task_loss": 0.06212467700242996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975188078703704, "compression_loss": 0.0, "distillation_loss": 0.02099071443080902, "epoch": 5.67, "learning_rate": 4.3620544632894996e-06, "loss": 0.0255, "step": 5968, "task_loss": 0.0659455806016922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797521836176134, "compression_loss": 0.0, "distillation_loss": 0.03363244980573654, "epoch": 5.67, "learning_rate": 4.3560427724549965e-06, "loss": 0.0392, "step": 5969, "task_loss": 0.08905819803476334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975248620168545, "compression_loss": 0.0, "distillation_loss": 0.05440010130405426, "epoch": 5.67, "learning_rate": 4.350034831694752e-06, "loss": 0.0709, "step": 5970, "task_loss": 0.21891024708747864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975278853935354, "compression_loss": 0.0, "distillation_loss": 0.0742577612400055, "epoch": 5.67, "learning_rate": 4.344030642100133e-06, "loss": 0.0727, "step": 5971, "task_loss": 0.05856480449438095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975309063071804, "compression_loss": 0.0, "distillation_loss": 0.17016083002090454, "epoch": 5.67, "learning_rate": 4.33803020476182e-06, "loss": 0.1682, "step": 5972, "task_loss": 0.15005451440811157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975339247587933, "compression_loss": 0.0, "distillation_loss": 0.03252163529396057, "epoch": 5.67, "learning_rate": 4.3320335207698376e-06, "loss": 0.03, "step": 5973, "task_loss": 0.007399743422865868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975369407493776, "compression_loss": 0.0, "distillation_loss": 0.022539321333169937, "epoch": 5.67, "learning_rate": 4.326040591213501e-06, "loss": 0.0209, "step": 5974, "task_loss": 0.006453389301896095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975399542799372, "compression_loss": 0.0, "distillation_loss": 0.15156593918800354, "epoch": 5.67, "learning_rate": 4.320051417181453e-06, "loss": 0.1463, "step": 5975, "task_loss": 0.09853891283273697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975429653514756, "compression_loss": 0.0, "distillation_loss": 0.04325627535581589, "epoch": 5.68, "learning_rate": 4.314065999761668e-06, "loss": 0.0558, "step": 5976, "task_loss": 0.1683274209499359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975459739649966, "compression_loss": 0.0, "distillation_loss": 0.019812000915408134, "epoch": 5.68, "learning_rate": 4.308084340041413e-06, "loss": 0.0182, "step": 5977, "task_loss": 0.003462914377450943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975489801215038, "compression_loss": 0.0, "distillation_loss": 0.12688830494880676, "epoch": 5.68, "learning_rate": 4.302106439107298e-06, "loss": 0.1374, "step": 5978, "task_loss": 0.23153865337371826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975519838220009, "compression_loss": 0.0, "distillation_loss": 0.0461309477686882, "epoch": 5.68, "learning_rate": 4.296132298045222e-06, "loss": 0.0423, "step": 5979, "task_loss": 0.007924774661660194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975549850674917, "compression_loss": 0.0, "distillation_loss": 0.08937639743089676, "epoch": 5.68, "learning_rate": 4.29016191794043e-06, "loss": 0.0852, "step": 5980, "task_loss": 0.04732952266931534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975579838589797, "compression_loss": 0.0, "distillation_loss": 0.017793424427509308, "epoch": 5.68, "learning_rate": 4.284195299877469e-06, "loss": 0.0164, "step": 5981, "task_loss": 0.003985332325100899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975609801974687, "compression_loss": 0.0, "distillation_loss": 0.04416952282190323, "epoch": 5.68, "learning_rate": 4.278232444940192e-06, "loss": 0.041, "step": 5982, "task_loss": 0.01279473677277565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975639740839623, "compression_loss": 0.0, "distillation_loss": 0.10886112600564957, "epoch": 5.68, "learning_rate": 4.272273354211795e-06, "loss": 0.1035, "step": 5983, "task_loss": 0.05517327040433884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975669655194643, "compression_loss": 0.0, "distillation_loss": 0.02088319882750511, "epoch": 5.68, "learning_rate": 4.266318028774768e-06, "loss": 0.0191, "step": 5984, "task_loss": 0.00327393040060997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975699545049783, "compression_loss": 0.0, "distillation_loss": 0.038873784244060516, "epoch": 5.68, "learning_rate": 4.260366469710919e-06, "loss": 0.0364, "step": 5985, "task_loss": 0.013767654076218605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975729410415079, "compression_loss": 0.0, "distillation_loss": 0.014923624694347382, "epoch": 5.68, "learning_rate": 4.254418678101385e-06, "loss": 0.0138, "step": 5986, "task_loss": 0.0034323427826166153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975759251300569, "compression_loss": 0.0, "distillation_loss": 0.011618406511843204, "epoch": 5.69, "learning_rate": 4.248474655026618e-06, "loss": 0.011, "step": 5987, "task_loss": 0.005057178437709808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797578906771629, "compression_loss": 0.0, "distillation_loss": 0.03404705598950386, "epoch": 5.69, "learning_rate": 4.242534401566367e-06, "loss": 0.052, "step": 5988, "task_loss": 0.2138359099626541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975818859672279, "compression_loss": 0.0, "distillation_loss": 0.02775607816874981, "epoch": 5.69, "learning_rate": 4.236597918799709e-06, "loss": 0.0253, "step": 5989, "task_loss": 0.0035394616425037384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975848627178571, "compression_loss": 0.0, "distillation_loss": 0.029923155903816223, "epoch": 5.69, "learning_rate": 4.23066520780504e-06, "loss": 0.0275, "step": 5990, "task_loss": 0.005590047687292099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975878370245204, "compression_loss": 0.0, "distillation_loss": 0.03592822700738907, "epoch": 5.69, "learning_rate": 4.224736269660062e-06, "loss": 0.0419, "step": 5991, "task_loss": 0.09543702751398087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975908088882216, "compression_loss": 0.0, "distillation_loss": 0.040161147713661194, "epoch": 5.69, "learning_rate": 4.218811105441789e-06, "loss": 0.0431, "step": 5992, "task_loss": 0.06955252587795258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975937783099643, "compression_loss": 0.0, "distillation_loss": 0.013323452323675156, "epoch": 5.69, "learning_rate": 4.21288971622657e-06, "loss": 0.0177, "step": 5993, "task_loss": 0.056832775473594666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797596745290752, "compression_loss": 0.0, "distillation_loss": 0.05237278342247009, "epoch": 5.69, "learning_rate": 4.206972103090037e-06, "loss": 0.0474, "step": 5994, "task_loss": 0.0029510650783777237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7975997098315886, "compression_loss": 0.0, "distillation_loss": 0.09177492558956146, "epoch": 5.69, "learning_rate": 4.201058267107169e-06, "loss": 0.0933, "step": 5995, "task_loss": 0.10701367259025574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976026719334777, "compression_loss": 0.0, "distillation_loss": 0.034797269850969315, "epoch": 5.69, "learning_rate": 4.195148209352232e-06, "loss": 0.0439, "step": 5996, "task_loss": 0.12537652254104614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976056315974229, "compression_loss": 0.0, "distillation_loss": 0.010959111154079437, "epoch": 5.7, "learning_rate": 4.189241930898824e-06, "loss": 0.0103, "step": 5997, "task_loss": 0.00464300811290741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976085888244282, "compression_loss": 0.0, "distillation_loss": 0.027968231588602066, "epoch": 5.7, "learning_rate": 4.183339432819844e-06, "loss": 0.0294, "step": 5998, "task_loss": 0.04234401881694794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976115436154969, "compression_loss": 0.0, "distillation_loss": 0.06177544593811035, "epoch": 5.7, "learning_rate": 4.177440716187506e-06, "loss": 0.058, "step": 5999, "task_loss": 0.024354927241802216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976144959716329, "compression_loss": 0.0, "distillation_loss": 0.024940505623817444, "epoch": 5.7, "learning_rate": 4.17154578207335e-06, "loss": 0.0233, "step": 6000, "task_loss": 0.008159097284078598 }, { "epoch": 5.7, "eval_accuracy": 0.8830275229357798, "eval_loss": 0.45090019702911377, "eval_runtime": 18.0915, "eval_samples_per_second": 48.199, "eval_steps_per_second": 6.025, "step": 6000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976174458938398, "compression_loss": 0.0, "distillation_loss": 0.0402870811522007, "epoch": 5.7, "learning_rate": 4.165654631548213e-06, "loss": 0.0397, "step": 6001, "task_loss": 0.03429171442985535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976203933831213, "compression_loss": 0.0, "distillation_loss": 0.01571035385131836, "epoch": 5.7, "learning_rate": 4.159767265682243e-06, "loss": 0.0147, "step": 6002, "task_loss": 0.0054051876068115234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976233384404812, "compression_loss": 0.0, "distillation_loss": 0.033142976462841034, "epoch": 5.7, "learning_rate": 4.153883685544921e-06, "loss": 0.0392, "step": 6003, "task_loss": 0.09410811960697174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976262810669229, "compression_loss": 0.0, "distillation_loss": 0.03702503442764282, "epoch": 5.7, "learning_rate": 4.1480038922050295e-06, "loss": 0.0537, "step": 6004, "task_loss": 0.20370852947235107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976292212634503, "compression_loss": 0.0, "distillation_loss": 0.03896544128656387, "epoch": 5.7, "learning_rate": 4.142127886730654e-06, "loss": 0.0421, "step": 6005, "task_loss": 0.0707533061504364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976321590310671, "compression_loss": 0.0, "distillation_loss": 0.02081841602921486, "epoch": 5.7, "learning_rate": 4.136255670189193e-06, "loss": 0.0193, "step": 6006, "task_loss": 0.005161428824067116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976350943707768, "compression_loss": 0.0, "distillation_loss": 0.12577922642230988, "epoch": 5.7, "learning_rate": 4.130387243647377e-06, "loss": 0.1272, "step": 6007, "task_loss": 0.1395563781261444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976380272835834, "compression_loss": 0.0, "distillation_loss": 0.019099004566669464, "epoch": 5.71, "learning_rate": 4.12452260817123e-06, "loss": 0.0204, "step": 6008, "task_loss": 0.03233078494668007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976409577704902, "compression_loss": 0.0, "distillation_loss": 0.04859714210033417, "epoch": 5.71, "learning_rate": 4.118661764826079e-06, "loss": 0.051, "step": 6009, "task_loss": 0.07280280441045761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976438858325011, "compression_loss": 0.0, "distillation_loss": 0.018262770026922226, "epoch": 5.71, "learning_rate": 4.112804714676594e-06, "loss": 0.0169, "step": 6010, "task_loss": 0.004157818853855133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976468114706198, "compression_loss": 0.0, "distillation_loss": 0.047912146896123886, "epoch": 5.71, "learning_rate": 4.106951458786717e-06, "loss": 0.0471, "step": 6011, "task_loss": 0.04020364210009575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976497346858499, "compression_loss": 0.0, "distillation_loss": 0.02587919309735298, "epoch": 5.71, "learning_rate": 4.101101998219737e-06, "loss": 0.0394, "step": 6012, "task_loss": 0.1613081842660904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976526554791951, "compression_loss": 0.0, "distillation_loss": 0.025433223694562912, "epoch": 5.71, "learning_rate": 4.095256334038222e-06, "loss": 0.024, "step": 6013, "task_loss": 0.010708844289183617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976555738516591, "compression_loss": 0.0, "distillation_loss": 0.025807831436395645, "epoch": 5.71, "learning_rate": 4.089414467304078e-06, "loss": 0.028, "step": 6014, "task_loss": 0.0474732369184494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976584898042456, "compression_loss": 0.0, "distillation_loss": 0.022514909505844116, "epoch": 5.71, "learning_rate": 4.083576399078504e-06, "loss": 0.0324, "step": 6015, "task_loss": 0.12099795788526535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976614033379582, "compression_loss": 0.0, "distillation_loss": 0.013821316882967949, "epoch": 5.71, "learning_rate": 4.077742130422005e-06, "loss": 0.0127, "step": 6016, "task_loss": 0.002670343965291977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976643144538006, "compression_loss": 0.0, "distillation_loss": 0.0412713922560215, "epoch": 5.71, "learning_rate": 4.071911662394418e-06, "loss": 0.0377, "step": 6017, "task_loss": 0.005630454048514366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976672231527766, "compression_loss": 0.0, "distillation_loss": 0.07258160412311554, "epoch": 5.72, "learning_rate": 4.066084996054867e-06, "loss": 0.0764, "step": 6018, "task_loss": 0.11061573028564453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976701294358898, "compression_loss": 0.0, "distillation_loss": 0.04702713340520859, "epoch": 5.72, "learning_rate": 4.060262132461795e-06, "loss": 0.0465, "step": 6019, "task_loss": 0.041665218770504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976730333041439, "compression_loss": 0.0, "distillation_loss": 0.04431241750717163, "epoch": 5.72, "learning_rate": 4.054443072672942e-06, "loss": 0.042, "step": 6020, "task_loss": 0.020761430263519287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976759347585425, "compression_loss": 0.0, "distillation_loss": 0.02348828688263893, "epoch": 5.72, "learning_rate": 4.048627817745393e-06, "loss": 0.0219, "step": 6021, "task_loss": 0.007251596078276634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976788338000894, "compression_loss": 0.0, "distillation_loss": 0.015367396175861359, "epoch": 5.72, "learning_rate": 4.042816368735502e-06, "loss": 0.0143, "step": 6022, "task_loss": 0.004556819796562195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976817304297882, "compression_loss": 0.0, "distillation_loss": 0.08127819746732712, "epoch": 5.72, "learning_rate": 4.037008726698943e-06, "loss": 0.0863, "step": 6023, "task_loss": 0.13124635815620422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976846246486426, "compression_loss": 0.0, "distillation_loss": 0.010580083355307579, "epoch": 5.72, "learning_rate": 4.0312048926907156e-06, "loss": 0.0099, "step": 6024, "task_loss": 0.0037852171808481216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976875164576563, "compression_loss": 0.0, "distillation_loss": 0.036560431122779846, "epoch": 5.72, "learning_rate": 4.025404867765103e-06, "loss": 0.0468, "step": 6025, "task_loss": 0.13939811289310455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797690405857833, "compression_loss": 0.0, "distillation_loss": 0.05065472796559334, "epoch": 5.72, "learning_rate": 4.019608652975712e-06, "loss": 0.0525, "step": 6026, "task_loss": 0.06941260397434235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976932928501764, "compression_loss": 0.0, "distillation_loss": 0.033366162329912186, "epoch": 5.72, "learning_rate": 4.013816249375446e-06, "loss": 0.0437, "step": 6027, "task_loss": 0.13670209050178528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976961774356901, "compression_loss": 0.0, "distillation_loss": 0.07585856318473816, "epoch": 5.72, "learning_rate": 4.008027658016536e-06, "loss": 0.0709, "step": 6028, "task_loss": 0.026351409032940865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7976990596153778, "compression_loss": 0.0, "distillation_loss": 0.15275973081588745, "epoch": 5.73, "learning_rate": 4.0022428799504936e-06, "loss": 0.1562, "step": 6029, "task_loss": 0.18706992268562317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977019393902433, "compression_loss": 0.0, "distillation_loss": 0.014029610902071, "epoch": 5.73, "learning_rate": 3.996461916228159e-06, "loss": 0.02, "step": 6030, "task_loss": 0.07381336390972137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79770481676129, "compression_loss": 0.0, "distillation_loss": 0.014353383332490921, "epoch": 5.73, "learning_rate": 3.990684767899677e-06, "loss": 0.0231, "step": 6031, "task_loss": 0.10191542655229568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977076917295219, "compression_loss": 0.0, "distillation_loss": 0.12925012409687042, "epoch": 5.73, "learning_rate": 3.984911436014488e-06, "loss": 0.1234, "step": 6032, "task_loss": 0.07045982778072357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977105642959426, "compression_loss": 0.0, "distillation_loss": 0.021065477281808853, "epoch": 5.73, "learning_rate": 3.979141921621346e-06, "loss": 0.0263, "step": 6033, "task_loss": 0.07347995042800903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977134344615556, "compression_loss": 0.0, "distillation_loss": 0.03473827242851257, "epoch": 5.73, "learning_rate": 3.9733762257683076e-06, "loss": 0.0325, "step": 6034, "task_loss": 0.012779684737324715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977163022273648, "compression_loss": 0.0, "distillation_loss": 0.01884627155959606, "epoch": 5.73, "learning_rate": 3.967614349502749e-06, "loss": 0.0172, "step": 6035, "task_loss": 0.002278236672282219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977191675943738, "compression_loss": 0.0, "distillation_loss": 0.021827183663845062, "epoch": 5.73, "learning_rate": 3.961856293871336e-06, "loss": 0.0203, "step": 6036, "task_loss": 0.0063978079706430435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977220305635863, "compression_loss": 0.0, "distillation_loss": 0.0746820718050003, "epoch": 5.73, "learning_rate": 3.956102059920036e-06, "loss": 0.0857, "step": 6037, "task_loss": 0.1851119101047516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977248911360059, "compression_loss": 0.0, "distillation_loss": 0.04180888831615448, "epoch": 5.73, "learning_rate": 3.9503516486941565e-06, "loss": 0.039, "step": 6038, "task_loss": 0.014088783413171768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977277493126363, "compression_loss": 0.0, "distillation_loss": 0.02447015978395939, "epoch": 5.74, "learning_rate": 3.944605061238277e-06, "loss": 0.03, "step": 6039, "task_loss": 0.07963259518146515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977306050944813, "compression_loss": 0.0, "distillation_loss": 0.023615635931491852, "epoch": 5.74, "learning_rate": 3.938862298596291e-06, "loss": 0.0276, "step": 6040, "task_loss": 0.06358890980482101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977334584825445, "compression_loss": 0.0, "distillation_loss": 0.03558419644832611, "epoch": 5.74, "learning_rate": 3.9331233618113974e-06, "loss": 0.0409, "step": 6041, "task_loss": 0.0888403058052063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977363094778297, "compression_loss": 0.0, "distillation_loss": 0.016286537051200867, "epoch": 5.74, "learning_rate": 3.9273882519261066e-06, "loss": 0.0152, "step": 6042, "task_loss": 0.005049385130405426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977391580813403, "compression_loss": 0.0, "distillation_loss": 0.03099142014980316, "epoch": 5.74, "learning_rate": 3.921656969982229e-06, "loss": 0.04, "step": 6043, "task_loss": 0.12095896154642105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977420042940803, "compression_loss": 0.0, "distillation_loss": 0.09785278886556625, "epoch": 5.74, "learning_rate": 3.9159295170208695e-06, "loss": 0.1007, "step": 6044, "task_loss": 0.126520037651062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797744848117053, "compression_loss": 0.0, "distillation_loss": 0.015856163576245308, "epoch": 5.74, "learning_rate": 3.910205894082461e-06, "loss": 0.0301, "step": 6045, "task_loss": 0.15833263099193573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977476895512625, "compression_loss": 0.0, "distillation_loss": 0.06277845799922943, "epoch": 5.74, "learning_rate": 3.904486102206717e-06, "loss": 0.0659, "step": 6046, "task_loss": 0.09369910508394241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977505285977123, "compression_loss": 0.0, "distillation_loss": 0.05015091598033905, "epoch": 5.74, "learning_rate": 3.898770142432676e-06, "loss": 0.0622, "step": 6047, "task_loss": 0.1709066480398178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797753365257406, "compression_loss": 0.0, "distillation_loss": 0.02492024190723896, "epoch": 5.74, "learning_rate": 3.893058015798656e-06, "loss": 0.0323, "step": 6048, "task_loss": 0.09882645308971405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977561995313475, "compression_loss": 0.0, "distillation_loss": 0.017794858664274216, "epoch": 5.74, "learning_rate": 3.887349723342304e-06, "loss": 0.0305, "step": 6049, "task_loss": 0.14453651010990143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977590314205403, "compression_loss": 0.0, "distillation_loss": 0.023934083059430122, "epoch": 5.75, "learning_rate": 3.881645266100556e-06, "loss": 0.0297, "step": 6050, "task_loss": 0.08163676410913467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977618609259881, "compression_loss": 0.0, "distillation_loss": 0.04733239859342575, "epoch": 5.75, "learning_rate": 3.875944645109647e-06, "loss": 0.0493, "step": 6051, "task_loss": 0.06739036738872528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977646880486946, "compression_loss": 0.0, "distillation_loss": 0.07677607238292694, "epoch": 5.75, "learning_rate": 3.8702478614051355e-06, "loss": 0.0831, "step": 6052, "task_loss": 0.14044952392578125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977675127896635, "compression_loss": 0.0, "distillation_loss": 0.08341547846794128, "epoch": 5.75, "learning_rate": 3.864554916021859e-06, "loss": 0.08, "step": 6053, "task_loss": 0.04903567209839821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977703351498985, "compression_loss": 0.0, "distillation_loss": 0.04993259161710739, "epoch": 5.75, "learning_rate": 3.8588658099939665e-06, "loss": 0.0565, "step": 6054, "task_loss": 0.11562397330999374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977731551304031, "compression_loss": 0.0, "distillation_loss": 0.019889146089553833, "epoch": 5.75, "learning_rate": 3.853180544354915e-06, "loss": 0.0268, "step": 6055, "task_loss": 0.0885215774178505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977759727321814, "compression_loss": 0.0, "distillation_loss": 0.028516318649053574, "epoch": 5.75, "learning_rate": 3.847499120137471e-06, "loss": 0.0281, "step": 6056, "task_loss": 0.024541892111301422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977787879562367, "compression_loss": 0.0, "distillation_loss": 0.01963622495532036, "epoch": 5.75, "learning_rate": 3.84182153837368e-06, "loss": 0.0326, "step": 6057, "task_loss": 0.1491500735282898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977816008035727, "compression_loss": 0.0, "distillation_loss": 0.04049837589263916, "epoch": 5.75, "learning_rate": 3.836147800094903e-06, "loss": 0.0401, "step": 6058, "task_loss": 0.036487508565187454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977844112751933, "compression_loss": 0.0, "distillation_loss": 0.059603024274110794, "epoch": 5.75, "learning_rate": 3.830477906331806e-06, "loss": 0.0588, "step": 6059, "task_loss": 0.05121609568595886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977872193721021, "compression_loss": 0.0, "distillation_loss": 0.03637555241584778, "epoch": 5.75, "learning_rate": 3.824811858114355e-06, "loss": 0.0334, "step": 6060, "task_loss": 0.006767544895410538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977900250953027, "compression_loss": 0.0, "distillation_loss": 0.023839876055717468, "epoch": 5.76, "learning_rate": 3.819149656471802e-06, "loss": 0.0227, "step": 6061, "task_loss": 0.012284144759178162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977928284457988, "compression_loss": 0.0, "distillation_loss": 0.012317647226154804, "epoch": 5.76, "learning_rate": 3.8134913024327284e-06, "loss": 0.0171, "step": 6062, "task_loss": 0.0602097250521183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977956294245941, "compression_loss": 0.0, "distillation_loss": 0.04070582613348961, "epoch": 5.76, "learning_rate": 3.8078367970249885e-06, "loss": 0.0551, "step": 6063, "task_loss": 0.18498341739177704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7977984280326924, "compression_loss": 0.0, "distillation_loss": 0.06427805125713348, "epoch": 5.76, "learning_rate": 3.8021861412757654e-06, "loss": 0.069, "step": 6064, "task_loss": 0.11124201118946075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978012242710972, "compression_loss": 0.0, "distillation_loss": 0.13212275505065918, "epoch": 5.76, "learning_rate": 3.796539336211513e-06, "loss": 0.1268, "step": 6065, "task_loss": 0.07930596172809601 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978040181408124, "compression_loss": 0.0, "distillation_loss": 0.0323861762881279, "epoch": 5.76, "learning_rate": 3.790896382858011e-06, "loss": 0.0336, "step": 6066, "task_loss": 0.04439293220639229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978068096428413, "compression_loss": 0.0, "distillation_loss": 0.05183859169483185, "epoch": 5.76, "learning_rate": 3.785257282240326e-06, "loss": 0.0565, "step": 6067, "task_loss": 0.09798099845647812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797809598778188, "compression_loss": 0.0, "distillation_loss": 0.02488609589636326, "epoch": 5.76, "learning_rate": 3.779622035382821e-06, "loss": 0.0376, "step": 6068, "task_loss": 0.15173770487308502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978123855478559, "compression_loss": 0.0, "distillation_loss": 0.013482201844453812, "epoch": 5.76, "learning_rate": 3.7739906433091804e-06, "loss": 0.0253, "step": 6069, "task_loss": 0.1319684386253357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797815169952849, "compression_loss": 0.0, "distillation_loss": 0.013858940452337265, "epoch": 5.76, "learning_rate": 3.768363107042361e-06, "loss": 0.0128, "step": 6070, "task_loss": 0.0029547102749347687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978179519941705, "compression_loss": 0.0, "distillation_loss": 0.030178038403391838, "epoch": 5.77, "learning_rate": 3.7627394276046337e-06, "loss": 0.0281, "step": 6071, "task_loss": 0.009721649810671806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978207316728245, "compression_loss": 0.0, "distillation_loss": 0.036198072135448456, "epoch": 5.77, "learning_rate": 3.7571196060175672e-06, "loss": 0.0389, "step": 6072, "task_loss": 0.06324063241481781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978235089898145, "compression_loss": 0.0, "distillation_loss": 0.03386306017637253, "epoch": 5.77, "learning_rate": 3.7515036433020343e-06, "loss": 0.0425, "step": 6073, "task_loss": 0.12017367035150528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978262839461442, "compression_loss": 0.0, "distillation_loss": 0.03635266050696373, "epoch": 5.77, "learning_rate": 3.7458915404782024e-06, "loss": 0.0361, "step": 6074, "task_loss": 0.034307606518268585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978290565428173, "compression_loss": 0.0, "distillation_loss": 0.04068465903401375, "epoch": 5.77, "learning_rate": 3.740283298565525e-06, "loss": 0.0485, "step": 6075, "task_loss": 0.11905878782272339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978318267808375, "compression_loss": 0.0, "distillation_loss": 0.10536225140094757, "epoch": 5.77, "learning_rate": 3.734678918582782e-06, "loss": 0.1052, "step": 6076, "task_loss": 0.10353413224220276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978345946612085, "compression_loss": 0.0, "distillation_loss": 0.05913068726658821, "epoch": 5.77, "learning_rate": 3.729078401548028e-06, "loss": 0.0667, "step": 6077, "task_loss": 0.1346680372953415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978373601849339, "compression_loss": 0.0, "distillation_loss": 0.029158534482121468, "epoch": 5.77, "learning_rate": 3.723481748478619e-06, "loss": 0.038, "step": 6078, "task_loss": 0.11800020188093185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978401233530175, "compression_loss": 0.0, "distillation_loss": 0.023293327540159225, "epoch": 5.77, "learning_rate": 3.717888960391222e-06, "loss": 0.0218, "step": 6079, "task_loss": 0.008854774758219719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978428841664628, "compression_loss": 0.0, "distillation_loss": 0.06899020075798035, "epoch": 5.77, "learning_rate": 3.7123000383017944e-06, "loss": 0.063, "step": 6080, "task_loss": 0.009547203779220581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978456426262737, "compression_loss": 0.0, "distillation_loss": 0.027045968919992447, "epoch": 5.77, "learning_rate": 3.7067149832255787e-06, "loss": 0.0329, "step": 6081, "task_loss": 0.08595190942287445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978483987334537, "compression_loss": 0.0, "distillation_loss": 0.18197451531887054, "epoch": 5.78, "learning_rate": 3.7011337961771394e-06, "loss": 0.1829, "step": 6082, "task_loss": 0.19158345460891724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978511524890065, "compression_loss": 0.0, "distillation_loss": 0.104429692029953, "epoch": 5.78, "learning_rate": 3.6955564781703257e-06, "loss": 0.1017, "step": 6083, "task_loss": 0.07724487781524658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797853903893936, "compression_loss": 0.0, "distillation_loss": 0.05032842978835106, "epoch": 5.78, "learning_rate": 3.6899830302182832e-06, "loss": 0.0553, "step": 6084, "task_loss": 0.1004081666469574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978566529492456, "compression_loss": 0.0, "distillation_loss": 0.02652582712471485, "epoch": 5.78, "learning_rate": 3.6844134533334437e-06, "loss": 0.0353, "step": 6085, "task_loss": 0.11424309015274048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978593996559391, "compression_loss": 0.0, "distillation_loss": 0.03817993402481079, "epoch": 5.78, "learning_rate": 3.6788477485275667e-06, "loss": 0.0446, "step": 6086, "task_loss": 0.1024344339966774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978621440150202, "compression_loss": 0.0, "distillation_loss": 0.04021666944026947, "epoch": 5.78, "learning_rate": 3.673285916811678e-06, "loss": 0.0425, "step": 6087, "task_loss": 0.06331319361925125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978648860274926, "compression_loss": 0.0, "distillation_loss": 0.021305495873093605, "epoch": 5.78, "learning_rate": 3.66772795919611e-06, "loss": 0.0294, "step": 6088, "task_loss": 0.10244767367839813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79786762569436, "compression_loss": 0.0, "distillation_loss": 0.05766810476779938, "epoch": 5.78, "learning_rate": 3.6621738766904944e-06, "loss": 0.0829, "step": 6089, "task_loss": 0.3096805214881897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797870363016626, "compression_loss": 0.0, "distillation_loss": 0.06154272332787514, "epoch": 5.78, "learning_rate": 3.6566236703037633e-06, "loss": 0.0717, "step": 6090, "task_loss": 0.1632338911294937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978730979952942, "compression_loss": 0.0, "distillation_loss": 0.11370549350976944, "epoch": 5.78, "learning_rate": 3.6510773410441316e-06, "loss": 0.1306, "step": 6091, "task_loss": 0.2824677526950836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978758306313685, "compression_loss": 0.0, "distillation_loss": 0.030081721022725105, "epoch": 5.79, "learning_rate": 3.6455348899191137e-06, "loss": 0.0389, "step": 6092, "task_loss": 0.11794061213731766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978785609258524, "compression_loss": 0.0, "distillation_loss": 0.01757514476776123, "epoch": 5.79, "learning_rate": 3.6399963179355347e-06, "loss": 0.0166, "step": 6093, "task_loss": 0.007390303537249565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978812888797497, "compression_loss": 0.0, "distillation_loss": 0.019966866821050644, "epoch": 5.79, "learning_rate": 3.6344616260994946e-06, "loss": 0.0241, "step": 6094, "task_loss": 0.06160247325897217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978840144940641, "compression_loss": 0.0, "distillation_loss": 0.03950147703289986, "epoch": 5.79, "learning_rate": 3.6289308154163927e-06, "loss": 0.0375, "step": 6095, "task_loss": 0.019919494166970253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978867377697991, "compression_loss": 0.0, "distillation_loss": 0.02330363541841507, "epoch": 5.79, "learning_rate": 3.6234038868909357e-06, "loss": 0.0384, "step": 6096, "task_loss": 0.17430183291435242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978894587079586, "compression_loss": 0.0, "distillation_loss": 0.020831573754549026, "epoch": 5.79, "learning_rate": 3.6178808415271158e-06, "loss": 0.0274, "step": 6097, "task_loss": 0.08686920255422592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978921773095462, "compression_loss": 0.0, "distillation_loss": 0.06831928342580795, "epoch": 5.79, "learning_rate": 3.6123616803282132e-06, "loss": 0.0654, "step": 6098, "task_loss": 0.03933015465736389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978948935755655, "compression_loss": 0.0, "distillation_loss": 0.033079955726861954, "epoch": 5.79, "learning_rate": 3.6068464042968123e-06, "loss": 0.0432, "step": 6099, "task_loss": 0.1338474154472351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7978976075070203, "compression_loss": 0.0, "distillation_loss": 0.01981043629348278, "epoch": 5.79, "learning_rate": 3.6013350144348005e-06, "loss": 0.0185, "step": 6100, "task_loss": 0.00669914111495018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979003191049142, "compression_loss": 0.0, "distillation_loss": 0.05477694794535637, "epoch": 5.79, "learning_rate": 3.595827511743341e-06, "loss": 0.0621, "step": 6101, "task_loss": 0.12840932607650757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797903028370251, "compression_loss": 0.0, "distillation_loss": 0.04909781366586685, "epoch": 5.79, "learning_rate": 3.5903238972228916e-06, "loss": 0.0456, "step": 6102, "task_loss": 0.014244996011257172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979057353040342, "compression_loss": 0.0, "distillation_loss": 0.015033694915473461, "epoch": 5.8, "learning_rate": 3.584824171873219e-06, "loss": 0.014, "step": 6103, "task_loss": 0.004645902663469315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979084399072676, "compression_loss": 0.0, "distillation_loss": 0.03330419957637787, "epoch": 5.8, "learning_rate": 3.579328336693377e-06, "loss": 0.0474, "step": 6104, "task_loss": 0.17442171275615692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979111421809548, "compression_loss": 0.0, "distillation_loss": 0.03729955852031708, "epoch": 5.8, "learning_rate": 3.5738363926816954e-06, "loss": 0.0373, "step": 6105, "task_loss": 0.03731003403663635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979138421260996, "compression_loss": 0.0, "distillation_loss": 0.028155038133263588, "epoch": 5.8, "learning_rate": 3.5683483408358307e-06, "loss": 0.0345, "step": 6106, "task_loss": 0.09175101667642593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979165397437057, "compression_loss": 0.0, "distillation_loss": 0.02423456311225891, "epoch": 5.8, "learning_rate": 3.5628641821527002e-06, "loss": 0.0265, "step": 6107, "task_loss": 0.04681221395730972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979192350347767, "compression_loss": 0.0, "distillation_loss": 0.025493551045656204, "epoch": 5.8, "learning_rate": 3.55738391762854e-06, "loss": 0.0236, "step": 6108, "task_loss": 0.00649440661072731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979219280003162, "compression_loss": 0.0, "distillation_loss": 0.0265052393078804, "epoch": 5.8, "learning_rate": 3.551907548258854e-06, "loss": 0.0333, "step": 6109, "task_loss": 0.09423819929361343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797924618641328, "compression_loss": 0.0, "distillation_loss": 0.019546575844287872, "epoch": 5.8, "learning_rate": 3.5464350750384594e-06, "loss": 0.0277, "step": 6110, "task_loss": 0.10154897719621658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979273069588158, "compression_loss": 0.0, "distillation_loss": 0.06870542466640472, "epoch": 5.8, "learning_rate": 3.5409664989614593e-06, "loss": 0.0715, "step": 6111, "task_loss": 0.09712858498096466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979299929537832, "compression_loss": 0.0, "distillation_loss": 0.019337208941578865, "epoch": 5.8, "learning_rate": 3.5355018210212394e-06, "loss": 0.0179, "step": 6112, "task_loss": 0.004613950848579407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979326766272339, "compression_loss": 0.0, "distillation_loss": 0.020329779013991356, "epoch": 5.81, "learning_rate": 3.530041042210483e-06, "loss": 0.0191, "step": 6113, "task_loss": 0.008153880015015602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979353579801717, "compression_loss": 0.0, "distillation_loss": 0.06501001864671707, "epoch": 5.81, "learning_rate": 3.524584163521177e-06, "loss": 0.0633, "step": 6114, "task_loss": 0.048086702823638916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979380370136001, "compression_loss": 0.0, "distillation_loss": 0.026702333241701126, "epoch": 5.81, "learning_rate": 3.5191311859445796e-06, "loss": 0.0326, "step": 6115, "task_loss": 0.08566763997077942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979407137285228, "compression_loss": 0.0, "distillation_loss": 0.07300684601068497, "epoch": 5.81, "learning_rate": 3.5136821104712557e-06, "loss": 0.0717, "step": 6116, "task_loss": 0.059550218284130096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979433881259437, "compression_loss": 0.0, "distillation_loss": 0.023176930844783783, "epoch": 5.81, "learning_rate": 3.5082369380910612e-06, "loss": 0.0228, "step": 6117, "task_loss": 0.01990087516605854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979460602068662, "compression_loss": 0.0, "distillation_loss": 0.01764390803873539, "epoch": 5.81, "learning_rate": 3.5027956697931324e-06, "loss": 0.0205, "step": 6118, "task_loss": 0.04635511338710785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979487299722942, "compression_loss": 0.0, "distillation_loss": 0.049246713519096375, "epoch": 5.81, "learning_rate": 3.497358306565901e-06, "loss": 0.055, "step": 6119, "task_loss": 0.1070106029510498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979513974232312, "compression_loss": 0.0, "distillation_loss": 0.024939920753240585, "epoch": 5.81, "learning_rate": 3.491924849397085e-06, "loss": 0.0399, "step": 6120, "task_loss": 0.1745125949382782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797954062560681, "compression_loss": 0.0, "distillation_loss": 0.13209006190299988, "epoch": 5.81, "learning_rate": 3.486495299273712e-06, "loss": 0.1453, "step": 6121, "task_loss": 0.2643681466579437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979567253856473, "compression_loss": 0.0, "distillation_loss": 0.02114560827612877, "epoch": 5.81, "learning_rate": 3.481069657182076e-06, "loss": 0.0195, "step": 6122, "task_loss": 0.004614364355802536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979593858991337, "compression_loss": 0.0, "distillation_loss": 0.05115702748298645, "epoch": 5.81, "learning_rate": 3.4756479241077667e-06, "loss": 0.0508, "step": 6123, "task_loss": 0.04790136218070984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797962044102144, "compression_loss": 0.0, "distillation_loss": 0.015820186585187912, "epoch": 5.82, "learning_rate": 3.4702301010356762e-06, "loss": 0.0145, "step": 6124, "task_loss": 0.002711120992898941 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979646999956818, "compression_loss": 0.0, "distillation_loss": 0.049572013318538666, "epoch": 5.82, "learning_rate": 3.464816188949982e-06, "loss": 0.0551, "step": 6125, "task_loss": 0.10523169487714767 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979673535807508, "compression_loss": 0.0, "distillation_loss": 0.020622238516807556, "epoch": 5.82, "learning_rate": 3.459406188834141e-06, "loss": 0.0318, "step": 6126, "task_loss": 0.13285362720489502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979700048583546, "compression_loss": 0.0, "distillation_loss": 0.027139317244291306, "epoch": 5.82, "learning_rate": 3.454000101670901e-06, "loss": 0.0251, "step": 6127, "task_loss": 0.006299933418631554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979726538294969, "compression_loss": 0.0, "distillation_loss": 0.008850215002894402, "epoch": 5.82, "learning_rate": 3.4485979284423155e-06, "loss": 0.0129, "step": 6128, "task_loss": 0.04908328130841255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979753004951815, "compression_loss": 0.0, "distillation_loss": 0.031228946521878242, "epoch": 5.82, "learning_rate": 3.443199670129707e-06, "loss": 0.0441, "step": 6129, "task_loss": 0.15944904088974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979779448564119, "compression_loss": 0.0, "distillation_loss": 0.13153165578842163, "epoch": 5.82, "learning_rate": 3.4378053277136946e-06, "loss": 0.1276, "step": 6130, "task_loss": 0.09214229881763458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.797980586914192, "compression_loss": 0.0, "distillation_loss": 0.04689065366983414, "epoch": 5.82, "learning_rate": 3.4324149021741915e-06, "loss": 0.0474, "step": 6131, "task_loss": 0.05218418687582016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979832266695255, "compression_loss": 0.0, "distillation_loss": 0.0955977588891983, "epoch": 5.82, "learning_rate": 3.4270283944903946e-06, "loss": 0.087, "step": 6132, "task_loss": 0.009947611019015312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979858641234158, "compression_loss": 0.0, "distillation_loss": 0.11793653666973114, "epoch": 5.82, "learning_rate": 3.4216458056407775e-06, "loss": 0.1168, "step": 6133, "task_loss": 0.10687369108200073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979884992768668, "compression_loss": 0.0, "distillation_loss": 0.016993030905723572, "epoch": 5.83, "learning_rate": 3.416267136603124e-06, "loss": 0.016, "step": 6134, "task_loss": 0.006749266758561134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979911321308821, "compression_loss": 0.0, "distillation_loss": 0.03799954056739807, "epoch": 5.83, "learning_rate": 3.4108923883545004e-06, "loss": 0.0412, "step": 6135, "task_loss": 0.0700988918542862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979937626864654, "compression_loss": 0.0, "distillation_loss": 0.02764744684100151, "epoch": 5.83, "learning_rate": 3.405521561871247e-06, "loss": 0.0338, "step": 6136, "task_loss": 0.08966173231601715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979963909446204, "compression_loss": 0.0, "distillation_loss": 0.017384584993124008, "epoch": 5.83, "learning_rate": 3.400154658128998e-06, "loss": 0.0161, "step": 6137, "task_loss": 0.004670457914471626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7979990169063508, "compression_loss": 0.0, "distillation_loss": 0.029262276366353035, "epoch": 5.83, "learning_rate": 3.3947916781026877e-06, "loss": 0.0339, "step": 6138, "task_loss": 0.07558421790599823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980016405726602, "compression_loss": 0.0, "distillation_loss": 0.02195669896900654, "epoch": 5.83, "learning_rate": 3.3894326227665196e-06, "loss": 0.0203, "step": 6139, "task_loss": 0.005175255239009857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980042619445524, "compression_loss": 0.0, "distillation_loss": 0.017028218135237694, "epoch": 5.83, "learning_rate": 3.384077493093987e-06, "loss": 0.0157, "step": 6140, "task_loss": 0.003494156524538994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798006881023031, "compression_loss": 0.0, "distillation_loss": 0.01528292428702116, "epoch": 5.83, "learning_rate": 3.378726290057882e-06, "loss": 0.0142, "step": 6141, "task_loss": 0.004559960216283798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980094978090997, "compression_loss": 0.0, "distillation_loss": 0.014390531927347183, "epoch": 5.83, "learning_rate": 3.373379014630279e-06, "loss": 0.02, "step": 6142, "task_loss": 0.07083210349082947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980121123037621, "compression_loss": 0.0, "distillation_loss": 0.0584423765540123, "epoch": 5.83, "learning_rate": 3.368035667782535e-06, "loss": 0.0641, "step": 6143, "task_loss": 0.11510256677865982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798014724508022, "compression_loss": 0.0, "distillation_loss": 0.035305943340063095, "epoch": 5.83, "learning_rate": 3.3626962504852865e-06, "loss": 0.0345, "step": 6144, "task_loss": 0.026764407753944397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980173344228831, "compression_loss": 0.0, "distillation_loss": 0.026942508295178413, "epoch": 5.84, "learning_rate": 3.357360763708478e-06, "loss": 0.0398, "step": 6145, "task_loss": 0.15590748190879822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798019942049349, "compression_loss": 0.0, "distillation_loss": 0.10806174576282501, "epoch": 5.84, "learning_rate": 3.3520292084213145e-06, "loss": 0.1066, "step": 6146, "task_loss": 0.09339408576488495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980225473884235, "compression_loss": 0.0, "distillation_loss": 0.03551897406578064, "epoch": 5.84, "learning_rate": 3.3467015855923016e-06, "loss": 0.0324, "step": 6147, "task_loss": 0.004185806959867477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79802515044111, "compression_loss": 0.0, "distillation_loss": 0.18967923521995544, "epoch": 5.84, "learning_rate": 3.3413778961892324e-06, "loss": 0.1758, "step": 6148, "task_loss": 0.05130823701620102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980277512084125, "compression_loss": 0.0, "distillation_loss": 0.0280881579965353, "epoch": 5.84, "learning_rate": 3.33605814117918e-06, "loss": 0.0319, "step": 6149, "task_loss": 0.06627509742975235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980303496913346, "compression_loss": 0.0, "distillation_loss": 0.014908300712704659, "epoch": 5.84, "learning_rate": 3.3307423215284933e-06, "loss": 0.0137, "step": 6150, "task_loss": 0.002931041643023491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980329458908798, "compression_loss": 0.0, "distillation_loss": 0.04687492176890373, "epoch": 5.84, "learning_rate": 3.325430438202823e-06, "loss": 0.0432, "step": 6151, "task_loss": 0.009853720664978027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798035539808052, "compression_loss": 0.0, "distillation_loss": 0.08521527796983719, "epoch": 5.84, "learning_rate": 3.320122492167108e-06, "loss": 0.0926, "step": 6152, "task_loss": 0.15886865556240082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980381314438548, "compression_loss": 0.0, "distillation_loss": 0.023487182334065437, "epoch": 5.84, "learning_rate": 3.3148184843855513e-06, "loss": 0.0218, "step": 6153, "task_loss": 0.0069718193262815475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980407207992919, "compression_loss": 0.0, "distillation_loss": 0.008888408541679382, "epoch": 5.84, "learning_rate": 3.3095184158216497e-06, "loss": 0.0085, "step": 6154, "task_loss": 0.005421014502644539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980433078753669, "compression_loss": 0.0, "distillation_loss": 0.014762504026293755, "epoch": 5.85, "learning_rate": 3.304222287438194e-06, "loss": 0.0202, "step": 6155, "task_loss": 0.06863877922296524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980458926730837, "compression_loss": 0.0, "distillation_loss": 0.09593481570482254, "epoch": 5.85, "learning_rate": 3.2989301001972505e-06, "loss": 0.1038, "step": 6156, "task_loss": 0.17508172988891602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980484751934457, "compression_loss": 0.0, "distillation_loss": 0.04558049887418747, "epoch": 5.85, "learning_rate": 3.293641855060162e-06, "loss": 0.0424, "step": 6157, "task_loss": 0.013653889298439026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980510554374567, "compression_loss": 0.0, "distillation_loss": 0.021937305107712746, "epoch": 5.85, "learning_rate": 3.2883575529875726e-06, "loss": 0.0212, "step": 6158, "task_loss": 0.01466263085603714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980536334061205, "compression_loss": 0.0, "distillation_loss": 0.02071615308523178, "epoch": 5.85, "learning_rate": 3.2830771949393934e-06, "loss": 0.0194, "step": 6159, "task_loss": 0.007383199408650398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980562091004405, "compression_loss": 0.0, "distillation_loss": 0.02836211957037449, "epoch": 5.85, "learning_rate": 3.27780078187484e-06, "loss": 0.0417, "step": 6160, "task_loss": 0.16145291924476624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980587825214207, "compression_loss": 0.0, "distillation_loss": 0.06770492345094681, "epoch": 5.85, "learning_rate": 3.272528314752382e-06, "loss": 0.0737, "step": 6161, "task_loss": 0.12756067514419556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980613536700646, "compression_loss": 0.0, "distillation_loss": 0.041868340224027634, "epoch": 5.85, "learning_rate": 3.2672597945298006e-06, "loss": 0.0427, "step": 6162, "task_loss": 0.04990936070680618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980639225473759, "compression_loss": 0.0, "distillation_loss": 0.012833887711167336, "epoch": 5.85, "learning_rate": 3.2619952221641436e-06, "loss": 0.0204, "step": 6163, "task_loss": 0.08840099722146988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980664891543584, "compression_loss": 0.0, "distillation_loss": 0.07975931465625763, "epoch": 5.85, "learning_rate": 3.2567345986117437e-06, "loss": 0.0906, "step": 6164, "task_loss": 0.188090518116951 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980690534920156, "compression_loss": 0.0, "distillation_loss": 0.04811672866344452, "epoch": 5.85, "learning_rate": 3.251477924828222e-06, "loss": 0.0546, "step": 6165, "task_loss": 0.11324407160282135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980716155613513, "compression_loss": 0.0, "distillation_loss": 0.023004433140158653, "epoch": 5.86, "learning_rate": 3.2462252017684797e-06, "loss": 0.0273, "step": 6166, "task_loss": 0.06551149487495422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980741753633692, "compression_loss": 0.0, "distillation_loss": 0.039101071655750275, "epoch": 5.86, "learning_rate": 3.240976430386691e-06, "loss": 0.0382, "step": 6167, "task_loss": 0.030401092022657394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980767328990729, "compression_loss": 0.0, "distillation_loss": 0.03869537636637688, "epoch": 5.86, "learning_rate": 3.2357316116363278e-06, "loss": 0.0375, "step": 6168, "task_loss": 0.026733241975307465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798079288169466, "compression_loss": 0.0, "distillation_loss": 0.016314208507537842, "epoch": 5.86, "learning_rate": 3.2304907464701384e-06, "loss": 0.0184, "step": 6169, "task_loss": 0.037635624408721924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980818411755525, "compression_loss": 0.0, "distillation_loss": 0.15428408980369568, "epoch": 5.86, "learning_rate": 3.225253835840147e-06, "loss": 0.1555, "step": 6170, "task_loss": 0.16609327495098114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980843919183357, "compression_loss": 0.0, "distillation_loss": 0.024836333468556404, "epoch": 5.86, "learning_rate": 3.2200208806976634e-06, "loss": 0.0298, "step": 6171, "task_loss": 0.07457348704338074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980869403988196, "compression_loss": 0.0, "distillation_loss": 0.025460699573159218, "epoch": 5.86, "learning_rate": 3.2147918819932814e-06, "loss": 0.0235, "step": 6172, "task_loss": 0.00573544017970562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980894866180076, "compression_loss": 0.0, "distillation_loss": 0.020409489050507545, "epoch": 5.86, "learning_rate": 3.209566840676875e-06, "loss": 0.0189, "step": 6173, "task_loss": 0.00523163378238678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980920305769037, "compression_loss": 0.0, "distillation_loss": 0.04639172554016113, "epoch": 5.86, "learning_rate": 3.204345757697591e-06, "loss": 0.0423, "step": 6174, "task_loss": 0.005238974466919899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980945722765113, "compression_loss": 0.0, "distillation_loss": 0.07041463255882263, "epoch": 5.86, "learning_rate": 3.199128634003873e-06, "loss": 0.0673, "step": 6175, "task_loss": 0.03931209072470665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980971117178342, "compression_loss": 0.0, "distillation_loss": 0.10844946652650833, "epoch": 5.87, "learning_rate": 3.1939154705434267e-06, "loss": 0.1306, "step": 6176, "task_loss": 0.32991883158683777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7980996489018761, "compression_loss": 0.0, "distillation_loss": 0.0225403793156147, "epoch": 5.87, "learning_rate": 3.188706268263261e-06, "loss": 0.0212, "step": 6177, "task_loss": 0.009059395641088486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981021838296406, "compression_loss": 0.0, "distillation_loss": 0.0714748352766037, "epoch": 5.87, "learning_rate": 3.183501028109642e-06, "loss": 0.0773, "step": 6178, "task_loss": 0.1296701580286026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981047165021314, "compression_loss": 0.0, "distillation_loss": 0.043413858860731125, "epoch": 5.87, "learning_rate": 3.1782997510281352e-06, "loss": 0.0535, "step": 6179, "task_loss": 0.14417198300361633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981072469203524, "compression_loss": 0.0, "distillation_loss": 0.044636182487010956, "epoch": 5.87, "learning_rate": 3.173102437963571e-06, "loss": 0.0413, "step": 6180, "task_loss": 0.011053688824176788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798109775085307, "compression_loss": 0.0, "distillation_loss": 0.04771566390991211, "epoch": 5.87, "learning_rate": 3.1679090898600663e-06, "loss": 0.0465, "step": 6181, "task_loss": 0.035778842866420746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798112300997999, "compression_loss": 0.0, "distillation_loss": 0.14893057942390442, "epoch": 5.87, "learning_rate": 3.1627197076610243e-06, "loss": 0.149, "step": 6182, "task_loss": 0.14932629466056824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798114824659432, "compression_loss": 0.0, "distillation_loss": 0.04840100184082985, "epoch": 5.87, "learning_rate": 3.157534292309114e-06, "loss": 0.0514, "step": 6183, "task_loss": 0.07838822156190872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981173460706098, "compression_loss": 0.0, "distillation_loss": 0.04644680395722389, "epoch": 5.87, "learning_rate": 3.1523528447462925e-06, "loss": 0.0423, "step": 6184, "task_loss": 0.004648592323064804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798119865232536, "compression_loss": 0.0, "distillation_loss": 0.12863226234912872, "epoch": 5.87, "learning_rate": 3.147175365913793e-06, "loss": 0.1241, "step": 6185, "task_loss": 0.08361361920833588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981223821462143, "compression_loss": 0.0, "distillation_loss": 0.10540250688791275, "epoch": 5.87, "learning_rate": 3.1420018567521412e-06, "loss": 0.1011, "step": 6186, "task_loss": 0.06201855093240738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981248968126484, "compression_loss": 0.0, "distillation_loss": 0.027073774486780167, "epoch": 5.88, "learning_rate": 3.136832318201119e-06, "loss": 0.0254, "step": 6187, "task_loss": 0.010364262387156487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981274092328421, "compression_loss": 0.0, "distillation_loss": 0.09114031493663788, "epoch": 5.88, "learning_rate": 3.1316667511997967e-06, "loss": 0.0891, "step": 6188, "task_loss": 0.07045449316501617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981299194077989, "compression_loss": 0.0, "distillation_loss": 0.017900224775075912, "epoch": 5.88, "learning_rate": 3.126505156686532e-06, "loss": 0.0242, "step": 6189, "task_loss": 0.08130859583616257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981324273385224, "compression_loss": 0.0, "distillation_loss": 0.011481826193630695, "epoch": 5.88, "learning_rate": 3.121347535598948e-06, "loss": 0.0107, "step": 6190, "task_loss": 0.003604454919695854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981349330260166, "compression_loss": 0.0, "distillation_loss": 0.02443830482661724, "epoch": 5.88, "learning_rate": 3.11619388887395e-06, "loss": 0.0247, "step": 6191, "task_loss": 0.026645643636584282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981374364712849, "compression_loss": 0.0, "distillation_loss": 0.049847427755594254, "epoch": 5.88, "learning_rate": 3.111044217447731e-06, "loss": 0.0453, "step": 6192, "task_loss": 0.004831980913877487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981399376753311, "compression_loss": 0.0, "distillation_loss": 0.027359748259186745, "epoch": 5.88, "learning_rate": 3.105898522255743e-06, "loss": 0.0262, "step": 6193, "task_loss": 0.015636542811989784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981424366391588, "compression_loss": 0.0, "distillation_loss": 0.02219046652317047, "epoch": 5.88, "learning_rate": 3.100756804232735e-06, "loss": 0.0209, "step": 6194, "task_loss": 0.008930448442697525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981449333637719, "compression_loss": 0.0, "distillation_loss": 0.01434510201215744, "epoch": 5.88, "learning_rate": 3.095619064312719e-06, "loss": 0.0132, "step": 6195, "task_loss": 0.0025472547858953476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981474278501739, "compression_loss": 0.0, "distillation_loss": 0.014208871871232986, "epoch": 5.88, "learning_rate": 3.0904853034289943e-06, "loss": 0.0166, "step": 6196, "task_loss": 0.03827598690986633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981499200993685, "compression_loss": 0.0, "distillation_loss": 0.028200728818774223, "epoch": 5.89, "learning_rate": 3.085355522514136e-06, "loss": 0.0336, "step": 6197, "task_loss": 0.08197523653507233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981524101123594, "compression_loss": 0.0, "distillation_loss": 0.04185293987393379, "epoch": 5.89, "learning_rate": 3.0802297224999805e-06, "loss": 0.0391, "step": 6198, "task_loss": 0.014529986307024956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981548978901502, "compression_loss": 0.0, "distillation_loss": 0.02289126068353653, "epoch": 5.89, "learning_rate": 3.0751079043176673e-06, "loss": 0.0286, "step": 6199, "task_loss": 0.08047693967819214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981573834337446, "compression_loss": 0.0, "distillation_loss": 0.014290563762187958, "epoch": 5.89, "learning_rate": 3.0699900688975964e-06, "loss": 0.0134, "step": 6200, "task_loss": 0.005466701462864876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981598667441465, "compression_loss": 0.0, "distillation_loss": 0.11609383672475815, "epoch": 5.89, "learning_rate": 3.064876217169446e-06, "loss": 0.1114, "step": 6201, "task_loss": 0.06948763132095337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981623478223594, "compression_loss": 0.0, "distillation_loss": 0.05835815891623497, "epoch": 5.89, "learning_rate": 3.05976635006216e-06, "loss": 0.0661, "step": 6202, "task_loss": 0.13556569814682007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798164826669387, "compression_loss": 0.0, "distillation_loss": 0.13488264381885529, "epoch": 5.89, "learning_rate": 3.0546604685039947e-06, "loss": 0.1469, "step": 6203, "task_loss": 0.25527501106262207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798167303286233, "compression_loss": 0.0, "distillation_loss": 0.052829310297966, "epoch": 5.89, "learning_rate": 3.0495585734224424e-06, "loss": 0.059, "step": 6204, "task_loss": 0.1148587167263031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981697776739011, "compression_loss": 0.0, "distillation_loss": 0.01467627938836813, "epoch": 5.89, "learning_rate": 3.044460665744284e-06, "loss": 0.0242, "step": 6205, "task_loss": 0.11007630825042725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981722498333949, "compression_loss": 0.0, "distillation_loss": 0.017747841775417328, "epoch": 5.89, "learning_rate": 3.0393667463955914e-06, "loss": 0.0164, "step": 6206, "task_loss": 0.004152687266469002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981747197657181, "compression_loss": 0.0, "distillation_loss": 0.043606314808130264, "epoch": 5.89, "learning_rate": 3.034276816301693e-06, "loss": 0.0501, "step": 6207, "task_loss": 0.10881586372852325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981771874718745, "compression_loss": 0.0, "distillation_loss": 0.03161245957016945, "epoch": 5.9, "learning_rate": 3.0291908763872e-06, "loss": 0.0308, "step": 6208, "task_loss": 0.023898892104625702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981796529528676, "compression_loss": 0.0, "distillation_loss": 0.1580495983362198, "epoch": 5.9, "learning_rate": 3.0241089275759886e-06, "loss": 0.1547, "step": 6209, "task_loss": 0.12449827790260315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981821162097013, "compression_loss": 0.0, "distillation_loss": 0.017794454470276833, "epoch": 5.9, "learning_rate": 3.019030970791237e-06, "loss": 0.0165, "step": 6210, "task_loss": 0.004612291231751442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981845772433791, "compression_loss": 0.0, "distillation_loss": 0.06614623963832855, "epoch": 5.9, "learning_rate": 3.013957006955362e-06, "loss": 0.0748, "step": 6211, "task_loss": 0.1525183916091919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981870360549047, "compression_loss": 0.0, "distillation_loss": 0.10029789060354233, "epoch": 5.9, "learning_rate": 3.008887036990085e-06, "loss": 0.0955, "step": 6212, "task_loss": 0.05255453288555145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981894926452819, "compression_loss": 0.0, "distillation_loss": 0.08173903077840805, "epoch": 5.9, "learning_rate": 3.003821061816392e-06, "loss": 0.0937, "step": 6213, "task_loss": 0.20181068778038025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981919470155143, "compression_loss": 0.0, "distillation_loss": 0.02389482408761978, "epoch": 5.9, "learning_rate": 2.998759082354538e-06, "loss": 0.0471, "step": 6214, "task_loss": 0.25638294219970703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981943991666055, "compression_loss": 0.0, "distillation_loss": 0.060066595673561096, "epoch": 5.9, "learning_rate": 2.9937010995240543e-06, "loss": 0.0666, "step": 6215, "task_loss": 0.12505532801151276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981968490995595, "compression_loss": 0.0, "distillation_loss": 0.03829333186149597, "epoch": 5.9, "learning_rate": 2.988647114243748e-06, "loss": 0.0417, "step": 6216, "task_loss": 0.0723431333899498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7981992968153795, "compression_loss": 0.0, "distillation_loss": 0.06221352517604828, "epoch": 5.9, "learning_rate": 2.983597127431703e-06, "loss": 0.0646, "step": 6217, "task_loss": 0.08588936179876328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982017423150696, "compression_loss": 0.0, "distillation_loss": 0.0338950976729393, "epoch": 5.91, "learning_rate": 2.9785511400052727e-06, "loss": 0.0451, "step": 6218, "task_loss": 0.14563745260238647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982041855996332, "compression_loss": 0.0, "distillation_loss": 0.10232432931661606, "epoch": 5.91, "learning_rate": 2.973509152881079e-06, "loss": 0.1044, "step": 6219, "task_loss": 0.12259991466999054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982066266700742, "compression_loss": 0.0, "distillation_loss": 0.043488338589668274, "epoch": 5.91, "learning_rate": 2.9684711669750313e-06, "loss": 0.0447, "step": 6220, "task_loss": 0.05578209087252617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982090655273961, "compression_loss": 0.0, "distillation_loss": 0.02594229206442833, "epoch": 5.91, "learning_rate": 2.963437183202303e-06, "loss": 0.0315, "step": 6221, "task_loss": 0.08114132285118103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982115021726028, "compression_loss": 0.0, "distillation_loss": 0.03786736726760864, "epoch": 5.91, "learning_rate": 2.9584072024773395e-06, "loss": 0.0364, "step": 6222, "task_loss": 0.022759338840842247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982139366066978, "compression_loss": 0.0, "distillation_loss": 0.01899906061589718, "epoch": 5.91, "learning_rate": 2.953381225713858e-06, "loss": 0.0281, "step": 6223, "task_loss": 0.110114686191082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982163688306848, "compression_loss": 0.0, "distillation_loss": 0.07625134289264679, "epoch": 5.91, "learning_rate": 2.948359253824859e-06, "loss": 0.0865, "step": 6224, "task_loss": 0.1790730506181717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982187988455675, "compression_loss": 0.0, "distillation_loss": 0.04318426176905632, "epoch": 5.91, "learning_rate": 2.9433412877226036e-06, "loss": 0.0521, "step": 6225, "task_loss": 0.1328027844429016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982212266523496, "compression_loss": 0.0, "distillation_loss": 0.11644196510314941, "epoch": 5.91, "learning_rate": 2.938327328318624e-06, "loss": 0.1194, "step": 6226, "task_loss": 0.14612026512622833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982236522520348, "compression_loss": 0.0, "distillation_loss": 0.04620961844921112, "epoch": 5.91, "learning_rate": 2.9333173765237394e-06, "loss": 0.0492, "step": 6227, "task_loss": 0.07610806822776794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982260756456268, "compression_loss": 0.0, "distillation_loss": 0.02641240693628788, "epoch": 5.91, "learning_rate": 2.928311433248024e-06, "loss": 0.025, "step": 6228, "task_loss": 0.012464674189686775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982284968341291, "compression_loss": 0.0, "distillation_loss": 0.04388430714607239, "epoch": 5.92, "learning_rate": 2.923309499400839e-06, "loss": 0.0429, "step": 6229, "task_loss": 0.03393597900867462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982309158185457, "compression_loss": 0.0, "distillation_loss": 0.0793541669845581, "epoch": 5.92, "learning_rate": 2.918311575890803e-06, "loss": 0.0829, "step": 6230, "task_loss": 0.11435024440288544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982333325998799, "compression_loss": 0.0, "distillation_loss": 0.0714527815580368, "epoch": 5.92, "learning_rate": 2.9133176636258196e-06, "loss": 0.0659, "step": 6231, "task_loss": 0.015902981162071228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982357471791358, "compression_loss": 0.0, "distillation_loss": 0.07313866913318634, "epoch": 5.92, "learning_rate": 2.9083277635130523e-06, "loss": 0.0717, "step": 6232, "task_loss": 0.05869182199239731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982381595573168, "compression_loss": 0.0, "distillation_loss": 0.010230525396764278, "epoch": 5.92, "learning_rate": 2.903341876458937e-06, "loss": 0.0096, "step": 6233, "task_loss": 0.004364604130387306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982405697354266, "compression_loss": 0.0, "distillation_loss": 0.040943779051303864, "epoch": 5.92, "learning_rate": 2.8983600033691936e-06, "loss": 0.046, "step": 6234, "task_loss": 0.09106667339801788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798242977714469, "compression_loss": 0.0, "distillation_loss": 0.08053984493017197, "epoch": 5.92, "learning_rate": 2.8933821451487987e-06, "loss": 0.0847, "step": 6235, "task_loss": 0.12223599851131439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982453834954475, "compression_loss": 0.0, "distillation_loss": 0.018065018579363823, "epoch": 5.92, "learning_rate": 2.8884083027019976e-06, "loss": 0.0273, "step": 6236, "task_loss": 0.11061768233776093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982477870793661, "compression_loss": 0.0, "distillation_loss": 0.049103301018476486, "epoch": 5.92, "learning_rate": 2.8834384769323207e-06, "loss": 0.0446, "step": 6237, "task_loss": 0.0044719260185956955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982501884672281, "compression_loss": 0.0, "distillation_loss": 0.0168076790869236, "epoch": 5.92, "learning_rate": 2.878472668742563e-06, "loss": 0.0157, "step": 6238, "task_loss": 0.005568409338593483 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982525876600374, "compression_loss": 0.0, "distillation_loss": 0.14126557111740112, "epoch": 5.92, "learning_rate": 2.8735108790347824e-06, "loss": 0.1368, "step": 6239, "task_loss": 0.0968267098069191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982549846587976, "compression_loss": 0.0, "distillation_loss": 0.028058458119630814, "epoch": 5.93, "learning_rate": 2.8685531087103092e-06, "loss": 0.0302, "step": 6240, "task_loss": 0.049860235303640366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982573794645126, "compression_loss": 0.0, "distillation_loss": 0.028030022978782654, "epoch": 5.93, "learning_rate": 2.8635993586697553e-06, "loss": 0.0262, "step": 6241, "task_loss": 0.010059693828225136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982597720781858, "compression_loss": 0.0, "distillation_loss": 0.10418315976858139, "epoch": 5.93, "learning_rate": 2.8586496298129873e-06, "loss": 0.1219, "step": 6242, "task_loss": 0.28089088201522827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982621625008209, "compression_loss": 0.0, "distillation_loss": 0.01780703291296959, "epoch": 5.93, "learning_rate": 2.8537039230391416e-06, "loss": 0.0163, "step": 6243, "task_loss": 0.003232475370168686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982645507334218, "compression_loss": 0.0, "distillation_loss": 0.07454448938369751, "epoch": 5.93, "learning_rate": 2.8487622392466438e-06, "loss": 0.0806, "step": 6244, "task_loss": 0.13465920090675354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798266936776992, "compression_loss": 0.0, "distillation_loss": 0.025488190352916718, "epoch": 5.93, "learning_rate": 2.8438245793331598e-06, "loss": 0.0241, "step": 6245, "task_loss": 0.011609837412834167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982693206325353, "compression_loss": 0.0, "distillation_loss": 0.11189248412847519, "epoch": 5.93, "learning_rate": 2.8388909441956517e-06, "loss": 0.1093, "step": 6246, "task_loss": 0.08592940866947174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982717023010553, "compression_loss": 0.0, "distillation_loss": 0.01671120896935463, "epoch": 5.93, "learning_rate": 2.8339613347303283e-06, "loss": 0.0154, "step": 6247, "task_loss": 0.003899950534105301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982740817835556, "compression_loss": 0.0, "distillation_loss": 0.03453841432929039, "epoch": 5.93, "learning_rate": 2.8290357518326843e-06, "loss": 0.0494, "step": 6248, "task_loss": 0.18323814868927002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982764590810402, "compression_loss": 0.0, "distillation_loss": 0.04547137767076492, "epoch": 5.93, "learning_rate": 2.824114196397476e-06, "loss": 0.0428, "step": 6249, "task_loss": 0.01905733346939087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982788341945124, "compression_loss": 0.0, "distillation_loss": 0.02116478979587555, "epoch": 5.94, "learning_rate": 2.819196669318716e-06, "loss": 0.0196, "step": 6250, "task_loss": 0.0053593870252370834 }, { "epoch": 5.94, "eval_accuracy": 0.8841743119266054, "eval_loss": 0.43586060404777527, "eval_runtime": 17.6466, "eval_samples_per_second": 49.415, "eval_steps_per_second": 6.177, "step": 6250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982812071249761, "compression_loss": 0.0, "distillation_loss": 0.013482020236551762, "epoch": 5.94, "learning_rate": 2.814283171489712e-06, "loss": 0.0127, "step": 6251, "task_loss": 0.0053385235369205475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982835778734348, "compression_loss": 0.0, "distillation_loss": 0.08214183896780014, "epoch": 5.94, "learning_rate": 2.809373703803017e-06, "loss": 0.0771, "step": 6252, "task_loss": 0.0316874198615551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982859464408925, "compression_loss": 0.0, "distillation_loss": 0.03672255948185921, "epoch": 5.94, "learning_rate": 2.8044682671504536e-06, "loss": 0.0444, "step": 6253, "task_loss": 0.11331107467412949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982883128283526, "compression_loss": 0.0, "distillation_loss": 0.08191946148872375, "epoch": 5.94, "learning_rate": 2.7995668624231253e-06, "loss": 0.0842, "step": 6254, "task_loss": 0.10507936030626297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982906770368189, "compression_loss": 0.0, "distillation_loss": 0.04623375087976456, "epoch": 5.94, "learning_rate": 2.7946694905114006e-06, "loss": 0.0583, "step": 6255, "task_loss": 0.16685155034065247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798293039067295, "compression_loss": 0.0, "distillation_loss": 0.012130487710237503, "epoch": 5.94, "learning_rate": 2.789776152304904e-06, "loss": 0.0189, "step": 6256, "task_loss": 0.0798635482788086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982953989207847, "compression_loss": 0.0, "distillation_loss": 0.02477359026670456, "epoch": 5.94, "learning_rate": 2.7848868486925307e-06, "loss": 0.0283, "step": 6257, "task_loss": 0.0600624680519104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7982977565982916, "compression_loss": 0.0, "distillation_loss": 0.022501779720187187, "epoch": 5.94, "learning_rate": 2.7800015805624526e-06, "loss": 0.0403, "step": 6258, "task_loss": 0.20015643537044525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983001121008194, "compression_loss": 0.0, "distillation_loss": 0.011296769604086876, "epoch": 5.94, "learning_rate": 2.7751203488021042e-06, "loss": 0.019, "step": 6259, "task_loss": 0.08792918175458908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983024654293718, "compression_loss": 0.0, "distillation_loss": 0.030689077451825142, "epoch": 5.94, "learning_rate": 2.7702431542981715e-06, "loss": 0.038, "step": 6260, "task_loss": 0.10409321635961533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983048165849524, "compression_loss": 0.0, "distillation_loss": 0.07838670909404755, "epoch": 5.95, "learning_rate": 2.765369997936637e-06, "loss": 0.0724, "step": 6261, "task_loss": 0.018565570935606956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983071655685651, "compression_loss": 0.0, "distillation_loss": 0.02527971751987934, "epoch": 5.95, "learning_rate": 2.7605008806027206e-06, "loss": 0.0422, "step": 6262, "task_loss": 0.19478943943977356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983095123812134, "compression_loss": 0.0, "distillation_loss": 0.013227314688265324, "epoch": 5.95, "learning_rate": 2.7556358031809308e-06, "loss": 0.0121, "step": 6263, "task_loss": 0.002236088737845421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798311857023901, "compression_loss": 0.0, "distillation_loss": 0.013676393777132034, "epoch": 5.95, "learning_rate": 2.7507747665550227e-06, "loss": 0.014, "step": 6264, "task_loss": 0.01731892116367817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983141994976315, "compression_loss": 0.0, "distillation_loss": 0.05857694894075394, "epoch": 5.95, "learning_rate": 2.7459177716080363e-06, "loss": 0.0554, "step": 6265, "task_loss": 0.02669447846710682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983165398034088, "compression_loss": 0.0, "distillation_loss": 0.041833531111478806, "epoch": 5.95, "learning_rate": 2.741064819222264e-06, "loss": 0.0408, "step": 6266, "task_loss": 0.03170401230454445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983188779422363, "compression_loss": 0.0, "distillation_loss": 0.024519365280866623, "epoch": 5.95, "learning_rate": 2.7362159102792605e-06, "loss": 0.0226, "step": 6267, "task_loss": 0.005678284913301468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798321213915118, "compression_loss": 0.0, "distillation_loss": 0.017905663698911667, "epoch": 5.95, "learning_rate": 2.7313710456598667e-06, "loss": 0.0166, "step": 6268, "task_loss": 0.004889186471700668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983235477230574, "compression_loss": 0.0, "distillation_loss": 0.09741759300231934, "epoch": 5.95, "learning_rate": 2.72653022624417e-06, "loss": 0.0961, "step": 6269, "task_loss": 0.08380945026874542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983258793670581, "compression_loss": 0.0, "distillation_loss": 0.028389321640133858, "epoch": 5.95, "learning_rate": 2.7216934529115223e-06, "loss": 0.0266, "step": 6270, "task_loss": 0.010113891214132309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798328208848124, "compression_loss": 0.0, "distillation_loss": 0.012037597596645355, "epoch": 5.96, "learning_rate": 2.7168607265405517e-06, "loss": 0.0111, "step": 6271, "task_loss": 0.002512754872441292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983305361672587, "compression_loss": 0.0, "distillation_loss": 0.016703153029084206, "epoch": 5.96, "learning_rate": 2.712032048009153e-06, "loss": 0.0232, "step": 6272, "task_loss": 0.08192671090364456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983328613254658, "compression_loss": 0.0, "distillation_loss": 0.06296996772289276, "epoch": 5.96, "learning_rate": 2.707207418194474e-06, "loss": 0.0594, "step": 6273, "task_loss": 0.026859570294618607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798335184323749, "compression_loss": 0.0, "distillation_loss": 0.0931231677532196, "epoch": 5.96, "learning_rate": 2.702386837972923e-06, "loss": 0.095, "step": 6274, "task_loss": 0.11223654448986053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798337505163112, "compression_loss": 0.0, "distillation_loss": 0.08410904556512833, "epoch": 5.96, "learning_rate": 2.697570308220196e-06, "loss": 0.0936, "step": 6275, "task_loss": 0.17943312227725983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983398238445586, "compression_loss": 0.0, "distillation_loss": 0.021530931815505028, "epoch": 5.96, "learning_rate": 2.6927578298112324e-06, "loss": 0.022, "step": 6276, "task_loss": 0.025872595608234406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983421403690923, "compression_loss": 0.0, "distillation_loss": 0.1349969059228897, "epoch": 5.96, "learning_rate": 2.687949403620235e-06, "loss": 0.1343, "step": 6277, "task_loss": 0.1277485191822052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983444547377169, "compression_loss": 0.0, "distillation_loss": 0.047185495495796204, "epoch": 5.96, "learning_rate": 2.6831450305206896e-06, "loss": 0.044, "step": 6278, "task_loss": 0.015110282227396965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798346766951436, "compression_loss": 0.0, "distillation_loss": 0.028054438531398773, "epoch": 5.96, "learning_rate": 2.6783447113853305e-06, "loss": 0.026, "step": 6279, "task_loss": 0.00766693614423275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983490770112535, "compression_loss": 0.0, "distillation_loss": 0.01661674678325653, "epoch": 5.96, "learning_rate": 2.673548447086152e-06, "loss": 0.0274, "step": 6280, "task_loss": 0.12479189783334732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983513849181727, "compression_loss": 0.0, "distillation_loss": 0.012853579595685005, "epoch": 5.96, "learning_rate": 2.6687562384944216e-06, "loss": 0.0221, "step": 6281, "task_loss": 0.10521620512008667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983536906731976, "compression_loss": 0.0, "distillation_loss": 0.032144561409950256, "epoch": 5.97, "learning_rate": 2.6639680864806754e-06, "loss": 0.0308, "step": 6282, "task_loss": 0.01860850676894188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983559942773317, "compression_loss": 0.0, "distillation_loss": 0.023536494001746178, "epoch": 5.97, "learning_rate": 2.659183991914696e-06, "loss": 0.027, "step": 6283, "task_loss": 0.05826903134584427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983582957315788, "compression_loss": 0.0, "distillation_loss": 0.04953496530652046, "epoch": 5.97, "learning_rate": 2.6544039556655354e-06, "loss": 0.054, "step": 6284, "task_loss": 0.09401563555002213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983605950369426, "compression_loss": 0.0, "distillation_loss": 0.011628339067101479, "epoch": 5.97, "learning_rate": 2.6496279786015184e-06, "loss": 0.0187, "step": 6285, "task_loss": 0.08250655233860016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983628921944266, "compression_loss": 0.0, "distillation_loss": 0.03543800860643387, "epoch": 5.97, "learning_rate": 2.6448560615902202e-06, "loss": 0.0324, "step": 6286, "task_loss": 0.0052569154649972916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983651872050347, "compression_loss": 0.0, "distillation_loss": 0.015366720035672188, "epoch": 5.97, "learning_rate": 2.640088205498481e-06, "loss": 0.0165, "step": 6287, "task_loss": 0.0262142401188612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983674800697704, "compression_loss": 0.0, "distillation_loss": 0.04069266468286514, "epoch": 5.97, "learning_rate": 2.635324411192397e-06, "loss": 0.042, "step": 6288, "task_loss": 0.054010894149541855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983697707896376, "compression_loss": 0.0, "distillation_loss": 0.08131635189056396, "epoch": 5.97, "learning_rate": 2.6305646795373507e-06, "loss": 0.0923, "step": 6289, "task_loss": 0.19130820035934448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983720593656397, "compression_loss": 0.0, "distillation_loss": 0.022895563393831253, "epoch": 5.97, "learning_rate": 2.6258090113979646e-06, "loss": 0.0363, "step": 6290, "task_loss": 0.15735076367855072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983743457987806, "compression_loss": 0.0, "distillation_loss": 0.018831467255949974, "epoch": 5.97, "learning_rate": 2.6210574076381224e-06, "loss": 0.0432, "step": 6291, "task_loss": 0.26204773783683777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798376630090064, "compression_loss": 0.0, "distillation_loss": 0.0476510114967823, "epoch": 5.98, "learning_rate": 2.616309869120984e-06, "loss": 0.0463, "step": 6292, "task_loss": 0.033927060663700104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983789122404933, "compression_loss": 0.0, "distillation_loss": 0.04823393374681473, "epoch": 5.98, "learning_rate": 2.6115663967089587e-06, "loss": 0.0457, "step": 6293, "task_loss": 0.022519726306200027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983811922510725, "compression_loss": 0.0, "distillation_loss": 0.02489767223596573, "epoch": 5.98, "learning_rate": 2.6068269912637224e-06, "loss": 0.0393, "step": 6294, "task_loss": 0.16874471306800842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983834701228052, "compression_loss": 0.0, "distillation_loss": 0.01917928084731102, "epoch": 5.98, "learning_rate": 2.602091653646205e-06, "loss": 0.0256, "step": 6295, "task_loss": 0.08345439285039902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798385745856695, "compression_loss": 0.0, "distillation_loss": 0.011729862540960312, "epoch": 5.98, "learning_rate": 2.597360384716613e-06, "loss": 0.0111, "step": 6296, "task_loss": 0.00561935268342495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983880194537456, "compression_loss": 0.0, "distillation_loss": 0.08482478559017181, "epoch": 5.98, "learning_rate": 2.592633185334395e-06, "loss": 0.0894, "step": 6297, "task_loss": 0.1304791271686554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983902909149607, "compression_loss": 0.0, "distillation_loss": 0.0635722205042839, "epoch": 5.98, "learning_rate": 2.5879100563582724e-06, "loss": 0.0594, "step": 6298, "task_loss": 0.022111881524324417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798392560241344, "compression_loss": 0.0, "distillation_loss": 0.023897292092442513, "epoch": 5.98, "learning_rate": 2.5831909986462318e-06, "loss": 0.0219, "step": 6299, "task_loss": 0.004307908937335014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983948274338992, "compression_loss": 0.0, "distillation_loss": 0.024204891175031662, "epoch": 5.98, "learning_rate": 2.5784760130555057e-06, "loss": 0.0237, "step": 6300, "task_loss": 0.018668916076421738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983970924936299, "compression_loss": 0.0, "distillation_loss": 0.018057633191347122, "epoch": 5.98, "learning_rate": 2.573765100442596e-06, "loss": 0.0167, "step": 6301, "task_loss": 0.004313705489039421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7983993554215398, "compression_loss": 0.0, "distillation_loss": 0.05510625243186951, "epoch": 5.98, "learning_rate": 2.569058261663257e-06, "loss": 0.0541, "step": 6302, "task_loss": 0.04473312571644783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984016162186327, "compression_loss": 0.0, "distillation_loss": 0.016285603865981102, "epoch": 5.99, "learning_rate": 2.5643554975725182e-06, "loss": 0.0216, "step": 6303, "task_loss": 0.0692404955625534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984038748859122, "compression_loss": 0.0, "distillation_loss": 0.021900523453950882, "epoch": 5.99, "learning_rate": 2.5596568090246548e-06, "loss": 0.0205, "step": 6304, "task_loss": 0.007533866912126541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798406131424382, "compression_loss": 0.0, "distillation_loss": 0.0129203200340271, "epoch": 5.99, "learning_rate": 2.5549621968732005e-06, "loss": 0.0236, "step": 6305, "task_loss": 0.12000947445631027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984083858350457, "compression_loss": 0.0, "distillation_loss": 0.02725234068930149, "epoch": 5.99, "learning_rate": 2.5502716619709598e-06, "loss": 0.0256, "step": 6306, "task_loss": 0.01108565554022789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984106381189071, "compression_loss": 0.0, "distillation_loss": 0.03494996577501297, "epoch": 5.99, "learning_rate": 2.545585205169995e-06, "loss": 0.0337, "step": 6307, "task_loss": 0.022902732715010643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984128882769698, "compression_loss": 0.0, "distillation_loss": 0.026792103424668312, "epoch": 5.99, "learning_rate": 2.54090282732162e-06, "loss": 0.0306, "step": 6308, "task_loss": 0.06438016891479492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984151363102374, "compression_loss": 0.0, "distillation_loss": 0.015818610787391663, "epoch": 5.99, "learning_rate": 2.536224529276404e-06, "loss": 0.0239, "step": 6309, "task_loss": 0.09637781977653503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984173822197138, "compression_loss": 0.0, "distillation_loss": 0.015181312337517738, "epoch": 5.99, "learning_rate": 2.5315503118841956e-06, "loss": 0.0272, "step": 6310, "task_loss": 0.1348828375339508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984196260064026, "compression_loss": 0.0, "distillation_loss": 0.021186668425798416, "epoch": 5.99, "learning_rate": 2.5268801759940813e-06, "loss": 0.02, "step": 6311, "task_loss": 0.009767893701791763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984218676713074, "compression_loss": 0.0, "distillation_loss": 0.058261334896087646, "epoch": 5.99, "learning_rate": 2.522214122454411e-06, "loss": 0.0564, "step": 6312, "task_loss": 0.04001392796635628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.798424107215432, "compression_loss": 0.0, "distillation_loss": 0.02801283821463585, "epoch": 6.0, "learning_rate": 2.5175521521128035e-06, "loss": 0.0276, "step": 6313, "task_loss": 0.023807687684893608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.79842634463978, "compression_loss": 0.0, "distillation_loss": 0.166382297873497, "epoch": 6.0, "learning_rate": 2.512894265816121e-06, "loss": 0.165, "step": 6314, "task_loss": 0.15275904536247253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984285799453551, "compression_loss": 0.0, "distillation_loss": 0.06544499844312668, "epoch": 6.0, "learning_rate": 2.5082404644104995e-06, "loss": 0.0815, "step": 6315, "task_loss": 0.22553624212741852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984308131331609, "compression_loss": 0.0, "distillation_loss": 0.05184311419725418, "epoch": 6.0, "learning_rate": 2.503590748741311e-06, "loss": 0.0555, "step": 6316, "task_loss": 0.0887538492679596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984330442042012, "compression_loss": 0.0, "distillation_loss": 0.023621458560228348, "epoch": 6.0, "learning_rate": 2.498945119653215e-06, "loss": 0.0217, "step": 6317, "task_loss": 0.00461687333881855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.620820711904918, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7947265660077046, "compression/magnitude_sparsity/target_sparsity_level": 0.7984352731594797, "compression_loss": 0.0, "distillation_loss": 0.03740524873137474, "epoch": 6.0, "learning_rate": 2.4943035779901008e-06, "loss": 0.0582, "step": 6318, "task_loss": 0.24517786502838135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984375, "compression_loss": 0.0, "distillation_loss": 0.23347151279449463, "epoch": 6.0, "learning_rate": 2.4896661245951275e-06, "loss": 0.2215, "step": 6319, "task_loss": 0.11372692137956619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984397247267658, "compression_loss": 0.0, "distillation_loss": 0.3618473410606384, "epoch": 6.0, "learning_rate": 2.485032760310718e-06, "loss": 0.3399, "step": 6320, "task_loss": 0.14205172657966614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984419473407809, "compression_loss": 0.0, "distillation_loss": 0.2971758544445038, "epoch": 6.0, "learning_rate": 2.4804034859785414e-06, "loss": 0.2839, "step": 6321, "task_loss": 0.16441358625888824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984441678430487, "compression_loss": 0.0, "distillation_loss": 0.21649004518985748, "epoch": 6.0, "learning_rate": 2.475778302439524e-06, "loss": 0.2025, "step": 6322, "task_loss": 0.07684586942195892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984463862345732, "compression_loss": 0.0, "distillation_loss": 0.23935849964618683, "epoch": 6.0, "learning_rate": 2.4711572105338533e-06, "loss": 0.2226, "step": 6323, "task_loss": 0.07181292772293091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984486025163579, "compression_loss": 0.0, "distillation_loss": 0.24455556273460388, "epoch": 6.01, "learning_rate": 2.4665402111009814e-06, "loss": 0.2304, "step": 6324, "task_loss": 0.10250243544578552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984508166894065, "compression_loss": 0.0, "distillation_loss": 0.3571939468383789, "epoch": 6.01, "learning_rate": 2.4619273049796e-06, "loss": 0.3339, "step": 6325, "task_loss": 0.12418486177921295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984530287547227, "compression_loss": 0.0, "distillation_loss": 0.22672238945960999, "epoch": 6.01, "learning_rate": 2.4573184930076655e-06, "loss": 0.2185, "step": 6326, "task_loss": 0.14480708539485931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984552387133103, "compression_loss": 0.0, "distillation_loss": 0.2425723820924759, "epoch": 6.01, "learning_rate": 2.4527137760224e-06, "loss": 0.2238, "step": 6327, "task_loss": 0.05520417168736458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984574465661727, "compression_loss": 0.0, "distillation_loss": 0.2952827215194702, "epoch": 6.01, "learning_rate": 2.4481131548602627e-06, "loss": 0.2822, "step": 6328, "task_loss": 0.16485857963562012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984596523143138, "compression_loss": 0.0, "distillation_loss": 0.24353119730949402, "epoch": 6.01, "learning_rate": 2.443516630356979e-06, "loss": 0.2293, "step": 6329, "task_loss": 0.10098670423030853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984618559587373, "compression_loss": 0.0, "distillation_loss": 0.2624762952327728, "epoch": 6.01, "learning_rate": 2.4389242033475366e-06, "loss": 0.2415, "step": 6330, "task_loss": 0.05250587314367294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984640575004468, "compression_loss": 0.0, "distillation_loss": 0.20973241329193115, "epoch": 6.01, "learning_rate": 2.4343358746661686e-06, "loss": 0.1955, "step": 6331, "task_loss": 0.06739503890275955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984662569404459, "compression_loss": 0.0, "distillation_loss": 0.26178908348083496, "epoch": 6.01, "learning_rate": 2.4297516451463608e-06, "loss": 0.2473, "step": 6332, "task_loss": 0.11708267778158188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984684542797384, "compression_loss": 0.0, "distillation_loss": 0.18669354915618896, "epoch": 6.01, "learning_rate": 2.4251715156208665e-06, "loss": 0.1795, "step": 6333, "task_loss": 0.11463765054941177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984706495193281, "compression_loss": 0.0, "distillation_loss": 0.305093377828598, "epoch": 6.02, "learning_rate": 2.4205954869216922e-06, "loss": 0.289, "step": 6334, "task_loss": 0.14380815625190735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984728426602185, "compression_loss": 0.0, "distillation_loss": 0.15407568216323853, "epoch": 6.02, "learning_rate": 2.416023559880093e-06, "loss": 0.1493, "step": 6335, "task_loss": 0.1060083881020546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984750337034132, "compression_loss": 0.0, "distillation_loss": 0.20316243171691895, "epoch": 6.02, "learning_rate": 2.4114557353265733e-06, "loss": 0.1901, "step": 6336, "task_loss": 0.0729086771607399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798477222649916, "compression_loss": 0.0, "distillation_loss": 0.26141518354415894, "epoch": 6.02, "learning_rate": 2.4068920140909114e-06, "loss": 0.2509, "step": 6337, "task_loss": 0.15607582032680511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984794095007308, "compression_loss": 0.0, "distillation_loss": 0.22481253743171692, "epoch": 6.02, "learning_rate": 2.4023323970021273e-06, "loss": 0.2087, "step": 6338, "task_loss": 0.06392265856266022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984815942568609, "compression_loss": 0.0, "distillation_loss": 0.16369011998176575, "epoch": 6.02, "learning_rate": 2.397776884888489e-06, "loss": 0.1505, "step": 6339, "task_loss": 0.03216647729277611 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984837769193103, "compression_loss": 0.0, "distillation_loss": 0.22370657324790955, "epoch": 6.02, "learning_rate": 2.393225478577532e-06, "loss": 0.2189, "step": 6340, "task_loss": 0.17515668272972107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984859574890824, "compression_loss": 0.0, "distillation_loss": 0.22182436287403107, "epoch": 6.02, "learning_rate": 2.3886781788960477e-06, "loss": 0.2059, "step": 6341, "task_loss": 0.06226883456110954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798488135967181, "compression_loss": 0.0, "distillation_loss": 0.19668897986412048, "epoch": 6.02, "learning_rate": 2.384134986670067e-06, "loss": 0.1828, "step": 6342, "task_loss": 0.0580269992351532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984903123546099, "compression_loss": 0.0, "distillation_loss": 0.16411647200584412, "epoch": 6.02, "learning_rate": 2.3795959027248847e-06, "loss": 0.1578, "step": 6343, "task_loss": 0.10057196021080017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984924866523726, "compression_loss": 0.0, "distillation_loss": 0.21145261824131012, "epoch": 6.02, "learning_rate": 2.3750609278850505e-06, "loss": 0.1984, "step": 6344, "task_loss": 0.08086170256137848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984946588614729, "compression_loss": 0.0, "distillation_loss": 0.12663835287094116, "epoch": 6.03, "learning_rate": 2.3705300629743606e-06, "loss": 0.1277, "step": 6345, "task_loss": 0.13693031668663025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984968289829144, "compression_loss": 0.0, "distillation_loss": 0.19469054043293, "epoch": 6.03, "learning_rate": 2.3660033088158647e-06, "loss": 0.179, "step": 6346, "task_loss": 0.037706997245550156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7984989970177009, "compression_loss": 0.0, "distillation_loss": 0.16001702845096588, "epoch": 6.03, "learning_rate": 2.36148066623188e-06, "loss": 0.1503, "step": 6347, "task_loss": 0.06241552159190178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798501162966836, "compression_loss": 0.0, "distillation_loss": 0.2323281466960907, "epoch": 6.03, "learning_rate": 2.356962136043961e-06, "loss": 0.2186, "step": 6348, "task_loss": 0.09503776580095291 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985033268313234, "compression_loss": 0.0, "distillation_loss": 0.18243101239204407, "epoch": 6.03, "learning_rate": 2.3524477190729144e-06, "loss": 0.1675, "step": 6349, "task_loss": 0.03352750092744827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985054886121667, "compression_loss": 0.0, "distillation_loss": 0.24202725291252136, "epoch": 6.03, "learning_rate": 2.3479374161388124e-06, "loss": 0.2333, "step": 6350, "task_loss": 0.1544978767633438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985076483103698, "compression_loss": 0.0, "distillation_loss": 0.2221883088350296, "epoch": 6.03, "learning_rate": 2.343431228060977e-06, "loss": 0.2053, "step": 6351, "task_loss": 0.05342654138803482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985098059269361, "compression_loss": 0.0, "distillation_loss": 0.17091111838817596, "epoch": 6.03, "learning_rate": 2.3389291556579732e-06, "loss": 0.1579, "step": 6352, "task_loss": 0.0405244342982769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985119614628695, "compression_loss": 0.0, "distillation_loss": 0.25911980867385864, "epoch": 6.03, "learning_rate": 2.334431199747622e-06, "loss": 0.2423, "step": 6353, "task_loss": 0.09104666113853455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985141149191736, "compression_loss": 0.0, "distillation_loss": 0.09606660157442093, "epoch": 6.03, "learning_rate": 2.3299373611470053e-06, "loss": 0.0913, "step": 6354, "task_loss": 0.04837304726243019 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798516266296852, "compression_loss": 0.0, "distillation_loss": 0.15584638714790344, "epoch": 6.04, "learning_rate": 2.3254476406724483e-06, "loss": 0.1464, "step": 6355, "task_loss": 0.06180054694414139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985184155969085, "compression_loss": 0.0, "distillation_loss": 0.11610227823257446, "epoch": 6.04, "learning_rate": 2.3209620391395236e-06, "loss": 0.1073, "step": 6356, "task_loss": 0.027644779533147812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985205628203469, "compression_loss": 0.0, "distillation_loss": 0.16248267889022827, "epoch": 6.04, "learning_rate": 2.316480557363071e-06, "loss": 0.1549, "step": 6357, "task_loss": 0.08620963990688324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985227079681706, "compression_loss": 0.0, "distillation_loss": 0.14060789346694946, "epoch": 6.04, "learning_rate": 2.3120031961571697e-06, "loss": 0.1372, "step": 6358, "task_loss": 0.10676179826259613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985248510413834, "compression_loss": 0.0, "distillation_loss": 0.09738312661647797, "epoch": 6.04, "learning_rate": 2.3075299563351554e-06, "loss": 0.0958, "step": 6359, "task_loss": 0.08182113617658615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798526992040989, "compression_loss": 0.0, "distillation_loss": 0.11283515393733978, "epoch": 6.04, "learning_rate": 2.3030608387096087e-06, "loss": 0.1166, "step": 6360, "task_loss": 0.1502501219511032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798529130967991, "compression_loss": 0.0, "distillation_loss": 0.1467873603105545, "epoch": 6.04, "learning_rate": 2.298595844092377e-06, "loss": 0.1372, "step": 6361, "task_loss": 0.05081057548522949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985312678233933, "compression_loss": 0.0, "distillation_loss": 0.14234556257724762, "epoch": 6.04, "learning_rate": 2.29413497329454e-06, "loss": 0.138, "step": 6362, "task_loss": 0.09874287247657776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985334026081995, "compression_loss": 0.0, "distillation_loss": 0.14752641320228577, "epoch": 6.04, "learning_rate": 2.289678227126432e-06, "loss": 0.1442, "step": 6363, "task_loss": 0.11388550698757172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798535535323413, "compression_loss": 0.0, "distillation_loss": 0.11193514615297318, "epoch": 6.04, "learning_rate": 2.2852256063976537e-06, "loss": 0.1022, "step": 6364, "task_loss": 0.014793351292610168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798537665970038, "compression_loss": 0.0, "distillation_loss": 0.08797906339168549, "epoch": 6.04, "learning_rate": 2.28077711191704e-06, "loss": 0.0801, "step": 6365, "task_loss": 0.009286869317293167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985397945490776, "compression_loss": 0.0, "distillation_loss": 0.09950749576091766, "epoch": 6.05, "learning_rate": 2.276332744492676e-06, "loss": 0.0956, "step": 6366, "task_loss": 0.06009618192911148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985419210615359, "compression_loss": 0.0, "distillation_loss": 0.1407734453678131, "epoch": 6.05, "learning_rate": 2.271892504931905e-06, "loss": 0.1377, "step": 6367, "task_loss": 0.1104123517870903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985440455084164, "compression_loss": 0.0, "distillation_loss": 0.12445957213640213, "epoch": 6.05, "learning_rate": 2.2674563940413283e-06, "loss": 0.1133, "step": 6368, "task_loss": 0.012716732919216156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798546167890723, "compression_loss": 0.0, "distillation_loss": 0.13971540331840515, "epoch": 6.05, "learning_rate": 2.263024412626777e-06, "loss": 0.1393, "step": 6369, "task_loss": 0.13585472106933594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798548288209459, "compression_loss": 0.0, "distillation_loss": 0.09350334107875824, "epoch": 6.05, "learning_rate": 2.2585965614933395e-06, "loss": 0.089, "step": 6370, "task_loss": 0.048799920827150345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985504064656284, "compression_loss": 0.0, "distillation_loss": 0.13269993662834167, "epoch": 6.05, "learning_rate": 2.254172841445365e-06, "loss": 0.1258, "step": 6371, "task_loss": 0.06413581967353821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985525226602348, "compression_loss": 0.0, "distillation_loss": 0.11976936459541321, "epoch": 6.05, "learning_rate": 2.249753253286441e-06, "loss": 0.1226, "step": 6372, "task_loss": 0.14833396673202515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985546367942818, "compression_loss": 0.0, "distillation_loss": 0.11396154761314392, "epoch": 6.05, "learning_rate": 2.2453377978194024e-06, "loss": 0.1091, "step": 6373, "task_loss": 0.06537455320358276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985567488687731, "compression_loss": 0.0, "distillation_loss": 0.10704049468040466, "epoch": 6.05, "learning_rate": 2.2409264758463363e-06, "loss": 0.0984, "step": 6374, "task_loss": 0.02111482247710228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985588588847126, "compression_loss": 0.0, "distillation_loss": 0.1097719818353653, "epoch": 6.05, "learning_rate": 2.2365192881685843e-06, "loss": 0.1024, "step": 6375, "task_loss": 0.03654416650533676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985609668431037, "compression_loss": 0.0, "distillation_loss": 0.10831979662179947, "epoch": 6.06, "learning_rate": 2.232116235586737e-06, "loss": 0.1068, "step": 6376, "task_loss": 0.0928025022149086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985630727449502, "compression_loss": 0.0, "distillation_loss": 0.0745440348982811, "epoch": 6.06, "learning_rate": 2.2277173189006266e-06, "loss": 0.0728, "step": 6377, "task_loss": 0.05667824670672417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985651765912558, "compression_loss": 0.0, "distillation_loss": 0.09890006482601166, "epoch": 6.06, "learning_rate": 2.223322538909339e-06, "loss": 0.0972, "step": 6378, "task_loss": 0.0820925384759903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985672783830241, "compression_loss": 0.0, "distillation_loss": 0.06193868815898895, "epoch": 6.06, "learning_rate": 2.2189318964112054e-06, "loss": 0.0637, "step": 6379, "task_loss": 0.07989262044429779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985693781212589, "compression_loss": 0.0, "distillation_loss": 0.09831628203392029, "epoch": 6.06, "learning_rate": 2.2145453922038093e-06, "loss": 0.1028, "step": 6380, "task_loss": 0.14268594980239868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985714758069639, "compression_loss": 0.0, "distillation_loss": 0.1020318865776062, "epoch": 6.06, "learning_rate": 2.210163027083975e-06, "loss": 0.0982, "step": 6381, "task_loss": 0.06365399807691574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985735714411425, "compression_loss": 0.0, "distillation_loss": 0.12122917175292969, "epoch": 6.06, "learning_rate": 2.205784801847785e-06, "loss": 0.1198, "step": 6382, "task_loss": 0.10688640177249908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985756650247987, "compression_loss": 0.0, "distillation_loss": 0.11438720673322678, "epoch": 6.06, "learning_rate": 2.2014107172905666e-06, "loss": 0.1126, "step": 6383, "task_loss": 0.0968388095498085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798577756558936, "compression_loss": 0.0, "distillation_loss": 0.15790635347366333, "epoch": 6.06, "learning_rate": 2.197040774206882e-06, "loss": 0.1469, "step": 6384, "task_loss": 0.0477466806769371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985798460445582, "compression_loss": 0.0, "distillation_loss": 0.16549362242221832, "epoch": 6.06, "learning_rate": 2.192674973390568e-06, "loss": 0.1518, "step": 6385, "task_loss": 0.028843212872743607 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985819334826689, "compression_loss": 0.0, "distillation_loss": 0.08308826386928558, "epoch": 6.06, "learning_rate": 2.188313315634688e-06, "loss": 0.0802, "step": 6386, "task_loss": 0.05457150936126709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985840188742719, "compression_loss": 0.0, "distillation_loss": 0.10413433611392975, "epoch": 6.07, "learning_rate": 2.1839558017315547e-06, "loss": 0.0995, "step": 6387, "task_loss": 0.057557813823223114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985861022203707, "compression_loss": 0.0, "distillation_loss": 0.0583919882774353, "epoch": 6.07, "learning_rate": 2.1796024324727297e-06, "loss": 0.0768, "step": 6388, "task_loss": 0.24291850626468658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985881835219691, "compression_loss": 0.0, "distillation_loss": 0.07095431536436081, "epoch": 6.07, "learning_rate": 2.1752532086490333e-06, "loss": 0.0648, "step": 6389, "task_loss": 0.009517394006252289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985902627800708, "compression_loss": 0.0, "distillation_loss": 0.13259541988372803, "epoch": 6.07, "learning_rate": 2.1709081310505143e-06, "loss": 0.1337, "step": 6390, "task_loss": 0.14401474595069885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985923399956794, "compression_loss": 0.0, "distillation_loss": 0.06342358887195587, "epoch": 6.07, "learning_rate": 2.1665672004664765e-06, "loss": 0.0617, "step": 6391, "task_loss": 0.04634593799710274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985944151697987, "compression_loss": 0.0, "distillation_loss": 0.14910653233528137, "epoch": 6.07, "learning_rate": 2.1622304176854736e-06, "loss": 0.1597, "step": 6392, "task_loss": 0.25551342964172363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985964883034322, "compression_loss": 0.0, "distillation_loss": 0.054450199007987976, "epoch": 6.07, "learning_rate": 2.1578977834953053e-06, "loss": 0.05, "step": 6393, "task_loss": 0.010282203555107117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7985985593975837, "compression_loss": 0.0, "distillation_loss": 0.18467079102993011, "epoch": 6.07, "learning_rate": 2.153569298683017e-06, "loss": 0.1853, "step": 6394, "task_loss": 0.1906275451183319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986006284532569, "compression_loss": 0.0, "distillation_loss": 0.09689827263355255, "epoch": 6.07, "learning_rate": 2.149244964034888e-06, "loss": 0.0951, "step": 6395, "task_loss": 0.07894426584243774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986026954714555, "compression_loss": 0.0, "distillation_loss": 0.043446026742458344, "epoch": 6.07, "learning_rate": 2.1449247803364687e-06, "loss": 0.0397, "step": 6396, "task_loss": 0.006183173507452011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798604760453183, "compression_loss": 0.0, "distillation_loss": 0.05788339674472809, "epoch": 6.08, "learning_rate": 2.140608748372533e-06, "loss": 0.0535, "step": 6397, "task_loss": 0.013864126056432724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986068233994433, "compression_loss": 0.0, "distillation_loss": 0.06160301715135574, "epoch": 6.08, "learning_rate": 2.136296868927104e-06, "loss": 0.0612, "step": 6398, "task_loss": 0.057745207101106644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79860888431124, "compression_loss": 0.0, "distillation_loss": 0.06886729598045349, "epoch": 6.08, "learning_rate": 2.1319891427834664e-06, "loss": 0.0634, "step": 6399, "task_loss": 0.014371348544955254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986109431895767, "compression_loss": 0.0, "distillation_loss": 0.05757201835513115, "epoch": 6.08, "learning_rate": 2.127685570724136e-06, "loss": 0.0528, "step": 6400, "task_loss": 0.009750045835971832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986130000354573, "compression_loss": 0.0, "distillation_loss": 0.1767224222421646, "epoch": 6.08, "learning_rate": 2.1233861535308706e-06, "loss": 0.1751, "step": 6401, "task_loss": 0.1601455807685852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986150548498853, "compression_loss": 0.0, "distillation_loss": 0.08510977774858475, "epoch": 6.08, "learning_rate": 2.1190908919846875e-06, "loss": 0.08, "step": 6402, "task_loss": 0.03425342217087746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986171076338643, "compression_loss": 0.0, "distillation_loss": 0.10737930238246918, "epoch": 6.08, "learning_rate": 2.1147997868658425e-06, "loss": 0.1043, "step": 6403, "task_loss": 0.0761018693447113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986191583883983, "compression_loss": 0.0, "distillation_loss": 0.265876829624176, "epoch": 6.08, "learning_rate": 2.110512838953832e-06, "loss": 0.2627, "step": 6404, "task_loss": 0.23371505737304688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986212071144907, "compression_loss": 0.0, "distillation_loss": 0.0776783674955368, "epoch": 6.08, "learning_rate": 2.1062300490273968e-06, "loss": 0.0801, "step": 6405, "task_loss": 0.10176847875118256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986232538131453, "compression_loss": 0.0, "distillation_loss": 0.04853527992963791, "epoch": 6.08, "learning_rate": 2.1019514178645367e-06, "loss": 0.0567, "step": 6406, "task_loss": 0.13034237921237946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986252984853657, "compression_loss": 0.0, "distillation_loss": 0.04432214796543121, "epoch": 6.08, "learning_rate": 2.0976769462424774e-06, "loss": 0.0406, "step": 6407, "task_loss": 0.007335290312767029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986273411321556, "compression_loss": 0.0, "distillation_loss": 0.05146826058626175, "epoch": 6.09, "learning_rate": 2.0934066349376975e-06, "loss": 0.0476, "step": 6408, "task_loss": 0.012544374912977219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986293817545188, "compression_loss": 0.0, "distillation_loss": 0.08119907975196838, "epoch": 6.09, "learning_rate": 2.0891404847259267e-06, "loss": 0.0824, "step": 6409, "task_loss": 0.09306506812572479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986314203534588, "compression_loss": 0.0, "distillation_loss": 0.06213619187474251, "epoch": 6.09, "learning_rate": 2.08487849638212e-06, "loss": 0.0582, "step": 6410, "task_loss": 0.023112241178750992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986334569299796, "compression_loss": 0.0, "distillation_loss": 0.06045207008719444, "epoch": 6.09, "learning_rate": 2.0806206706804998e-06, "loss": 0.0657, "step": 6411, "task_loss": 0.11293409764766693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986354914850844, "compression_loss": 0.0, "distillation_loss": 0.06079762801527977, "epoch": 6.09, "learning_rate": 2.0763670083945114e-06, "loss": 0.0659, "step": 6412, "task_loss": 0.11149650812149048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986375240197773, "compression_loss": 0.0, "distillation_loss": 0.1584654152393341, "epoch": 6.09, "learning_rate": 2.0721175102968616e-06, "loss": 0.147, "step": 6413, "task_loss": 0.043657951056957245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986395545350619, "compression_loss": 0.0, "distillation_loss": 0.06325273215770721, "epoch": 6.09, "learning_rate": 2.067872177159488e-06, "loss": 0.059, "step": 6414, "task_loss": 0.020560430362820625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986415830319417, "compression_loss": 0.0, "distillation_loss": 0.050118543207645416, "epoch": 6.09, "learning_rate": 2.0636310097535724e-06, "loss": 0.0472, "step": 6415, "task_loss": 0.020842991769313812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986436095114204, "compression_loss": 0.0, "distillation_loss": 0.06810344755649567, "epoch": 6.09, "learning_rate": 2.0593940088495495e-06, "loss": 0.0687, "step": 6416, "task_loss": 0.07414126396179199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798645633974502, "compression_loss": 0.0, "distillation_loss": 0.10694337636232376, "epoch": 6.09, "learning_rate": 2.055161175217091e-06, "loss": 0.1054, "step": 6417, "task_loss": 0.09133277088403702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986476564221897, "compression_loss": 0.0, "distillation_loss": 0.05698537826538086, "epoch": 6.09, "learning_rate": 2.0509325096251004e-06, "loss": 0.0552, "step": 6418, "task_loss": 0.03890334814786911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986496768554876, "compression_loss": 0.0, "distillation_loss": 0.07449530810117722, "epoch": 6.1, "learning_rate": 2.046708012841744e-06, "loss": 0.072, "step": 6419, "task_loss": 0.049478065222501755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986516952753993, "compression_loss": 0.0, "distillation_loss": 0.07264258712530136, "epoch": 6.1, "learning_rate": 2.042487685634428e-06, "loss": 0.0676, "step": 6420, "task_loss": 0.021874142810702324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986537116829282, "compression_loss": 0.0, "distillation_loss": 0.09519660472869873, "epoch": 6.1, "learning_rate": 2.038271528769786e-06, "loss": 0.1082, "step": 6421, "task_loss": 0.2249455749988556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986557260790783, "compression_loss": 0.0, "distillation_loss": 0.03500593453645706, "epoch": 6.1, "learning_rate": 2.034059543013703e-06, "loss": 0.0322, "step": 6422, "task_loss": 0.006475191563367844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986577384648531, "compression_loss": 0.0, "distillation_loss": 0.0777549147605896, "epoch": 6.1, "learning_rate": 2.029851729131313e-06, "loss": 0.081, "step": 6423, "task_loss": 0.10977138578891754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986597488412565, "compression_loss": 0.0, "distillation_loss": 0.12887020409107208, "epoch": 6.1, "learning_rate": 2.0256480878869834e-06, "loss": 0.1275, "step": 6424, "task_loss": 0.11558264493942261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986617572092919, "compression_loss": 0.0, "distillation_loss": 0.1287572830915451, "epoch": 6.1, "learning_rate": 2.0214486200443168e-06, "loss": 0.1278, "step": 6425, "task_loss": 0.11913389712572098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986637635699632, "compression_loss": 0.0, "distillation_loss": 0.06908180564641953, "epoch": 6.1, "learning_rate": 2.017253326366181e-06, "loss": 0.065, "step": 6426, "task_loss": 0.027965903282165527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986657679242739, "compression_loss": 0.0, "distillation_loss": 0.09820552915334702, "epoch": 6.1, "learning_rate": 2.0130622076146576e-06, "loss": 0.0986, "step": 6427, "task_loss": 0.10234321653842926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986677702732278, "compression_loss": 0.0, "distillation_loss": 0.0638742744922638, "epoch": 6.1, "learning_rate": 2.0088752645510995e-06, "loss": 0.0609, "step": 6428, "task_loss": 0.03451484069228172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986697706178286, "compression_loss": 0.0, "distillation_loss": 0.18671873211860657, "epoch": 6.11, "learning_rate": 2.004692497936067e-06, "loss": 0.194, "step": 6429, "task_loss": 0.259968101978302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986717689590799, "compression_loss": 0.0, "distillation_loss": 0.04738321155309677, "epoch": 6.11, "learning_rate": 2.0005139085293945e-06, "loss": 0.0452, "step": 6430, "task_loss": 0.025601202622056007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986737652979855, "compression_loss": 0.0, "distillation_loss": 0.05693122744560242, "epoch": 6.11, "learning_rate": 1.996339497090138e-06, "loss": 0.0529, "step": 6431, "task_loss": 0.01685180887579918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986757596355489, "compression_loss": 0.0, "distillation_loss": 0.0986439436674118, "epoch": 6.11, "learning_rate": 1.9921692643765947e-06, "loss": 0.0996, "step": 6432, "task_loss": 0.10801742225885391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798677751972774, "compression_loss": 0.0, "distillation_loss": 0.11213336884975433, "epoch": 6.11, "learning_rate": 1.9880032111463166e-06, "loss": 0.1301, "step": 6433, "task_loss": 0.29207485914230347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986797423106643, "compression_loss": 0.0, "distillation_loss": 0.035904861986637115, "epoch": 6.11, "learning_rate": 1.9838413381560813e-06, "loss": 0.0345, "step": 6434, "task_loss": 0.021603219211101532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986817306502235, "compression_loss": 0.0, "distillation_loss": 0.05209188535809517, "epoch": 6.11, "learning_rate": 1.979683646161909e-06, "loss": 0.0516, "step": 6435, "task_loss": 0.04718216508626938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986837169924554, "compression_loss": 0.0, "distillation_loss": 0.05253394693136215, "epoch": 6.11, "learning_rate": 1.975530135919068e-06, "loss": 0.0616, "step": 6436, "task_loss": 0.14322909712791443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986857013383637, "compression_loss": 0.0, "distillation_loss": 0.0527234822511673, "epoch": 6.11, "learning_rate": 1.9713808081820716e-06, "loss": 0.0584, "step": 6437, "task_loss": 0.1097893938422203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986876836889519, "compression_loss": 0.0, "distillation_loss": 0.036524612456560135, "epoch": 6.11, "learning_rate": 1.9672356637046584e-06, "loss": 0.0335, "step": 6438, "task_loss": 0.00587356835603714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986896640452238, "compression_loss": 0.0, "distillation_loss": 0.0352536141872406, "epoch": 6.11, "learning_rate": 1.9630947032398067e-06, "loss": 0.0478, "step": 6439, "task_loss": 0.16093014180660248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798691642408183, "compression_loss": 0.0, "distillation_loss": 0.04243883490562439, "epoch": 6.12, "learning_rate": 1.9589579275397562e-06, "loss": 0.044, "step": 6440, "task_loss": 0.058299075812101364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986936187788334, "compression_loss": 0.0, "distillation_loss": 0.1163100153207779, "epoch": 6.12, "learning_rate": 1.9548253373559646e-06, "loss": 0.1117, "step": 6441, "task_loss": 0.07047397643327713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986955931581784, "compression_loss": 0.0, "distillation_loss": 0.0603814423084259, "epoch": 6.12, "learning_rate": 1.9506969334391332e-06, "loss": 0.0554, "step": 6442, "task_loss": 0.010571654886007309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986975655472218, "compression_loss": 0.0, "distillation_loss": 0.03320575878024101, "epoch": 6.12, "learning_rate": 1.9465727165392134e-06, "loss": 0.0305, "step": 6443, "task_loss": 0.006414549425244331 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7986995359469674, "compression_loss": 0.0, "distillation_loss": 0.025436367839574814, "epoch": 6.12, "learning_rate": 1.942452687405383e-06, "loss": 0.0307, "step": 6444, "task_loss": 0.07796012610197067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987015043584187, "compression_loss": 0.0, "distillation_loss": 0.04995856434106827, "epoch": 6.12, "learning_rate": 1.9383368467860734e-06, "loss": 0.0519, "step": 6445, "task_loss": 0.06982216984033585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987034707825795, "compression_loss": 0.0, "distillation_loss": 0.028508171439170837, "epoch": 6.12, "learning_rate": 1.9342251954289346e-06, "loss": 0.0264, "step": 6446, "task_loss": 0.00761030986905098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987054352204533, "compression_loss": 0.0, "distillation_loss": 0.06900826841592789, "epoch": 6.12, "learning_rate": 1.930117734080883e-06, "loss": 0.0651, "step": 6447, "task_loss": 0.029789365828037262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987073976730441, "compression_loss": 0.0, "distillation_loss": 0.07841695845127106, "epoch": 6.12, "learning_rate": 1.926014463488049e-06, "loss": 0.0737, "step": 6448, "task_loss": 0.031337086111307144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987093581413554, "compression_loss": 0.0, "distillation_loss": 0.03884424641728401, "epoch": 6.12, "learning_rate": 1.921915384395809e-06, "loss": 0.0368, "step": 6449, "task_loss": 0.018084675073623657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987113166263907, "compression_loss": 0.0, "distillation_loss": 0.03938658908009529, "epoch": 6.13, "learning_rate": 1.917820497548789e-06, "loss": 0.048, "step": 6450, "task_loss": 0.12579363584518433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987132731291541, "compression_loss": 0.0, "distillation_loss": 0.0376831591129303, "epoch": 6.13, "learning_rate": 1.913729803690839e-06, "loss": 0.0408, "step": 6451, "task_loss": 0.06835294514894485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987152276506488, "compression_loss": 0.0, "distillation_loss": 0.07034672051668167, "epoch": 6.13, "learning_rate": 1.9096433035650565e-06, "loss": 0.0667, "step": 6452, "task_loss": 0.03402462229132652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987171801918789, "compression_loss": 0.0, "distillation_loss": 0.06073524430394173, "epoch": 6.13, "learning_rate": 1.9055609979137634e-06, "loss": 0.0568, "step": 6453, "task_loss": 0.020950062200427055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987191307538479, "compression_loss": 0.0, "distillation_loss": 0.08777901530265808, "epoch": 6.13, "learning_rate": 1.9014828874785478e-06, "loss": 0.0853, "step": 6454, "task_loss": 0.0634586289525032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987210793375594, "compression_loss": 0.0, "distillation_loss": 0.054110340774059296, "epoch": 6.13, "learning_rate": 1.8974089730002087e-06, "loss": 0.0514, "step": 6455, "task_loss": 0.026968229562044144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987230259440173, "compression_loss": 0.0, "distillation_loss": 0.03255057334899902, "epoch": 6.13, "learning_rate": 1.89333925521879e-06, "loss": 0.0306, "step": 6456, "task_loss": 0.012770043686032295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987249705742251, "compression_loss": 0.0, "distillation_loss": 0.05460744723677635, "epoch": 6.13, "learning_rate": 1.8892737348735812e-06, "loss": 0.0586, "step": 6457, "task_loss": 0.09443493187427521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987269132291865, "compression_loss": 0.0, "distillation_loss": 0.10842868685722351, "epoch": 6.13, "learning_rate": 1.8852124127031022e-06, "loss": 0.1172, "step": 6458, "task_loss": 0.19569729268550873 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987288539099053, "compression_loss": 0.0, "distillation_loss": 0.04304853081703186, "epoch": 6.13, "learning_rate": 1.8811552894451107e-06, "loss": 0.042, "step": 6459, "task_loss": 0.03260982409119606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987307926173851, "compression_loss": 0.0, "distillation_loss": 0.07537424564361572, "epoch": 6.13, "learning_rate": 1.877102365836597e-06, "loss": 0.0735, "step": 6460, "task_loss": 0.056225549429655075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987327293526295, "compression_loss": 0.0, "distillation_loss": 0.02507023513317108, "epoch": 6.14, "learning_rate": 1.8730536426138034e-06, "loss": 0.023, "step": 6461, "task_loss": 0.004859650507569313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987346641166423, "compression_loss": 0.0, "distillation_loss": 0.06949448585510254, "epoch": 6.14, "learning_rate": 1.8690091205121941e-06, "loss": 0.0698, "step": 6462, "task_loss": 0.07217264175415039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987365969104272, "compression_loss": 0.0, "distillation_loss": 0.04962468147277832, "epoch": 6.14, "learning_rate": 1.8649688002664756e-06, "loss": 0.048, "step": 6463, "task_loss": 0.033658094704151154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987385277349879, "compression_loss": 0.0, "distillation_loss": 0.02775590494275093, "epoch": 6.14, "learning_rate": 1.8609326826106e-06, "loss": 0.0255, "step": 6464, "task_loss": 0.00518425740301609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987404565913279, "compression_loss": 0.0, "distillation_loss": 0.029499584808945656, "epoch": 6.14, "learning_rate": 1.8569007682777417e-06, "loss": 0.0272, "step": 6465, "task_loss": 0.006057474762201309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798742383480451, "compression_loss": 0.0, "distillation_loss": 0.04246009886264801, "epoch": 6.14, "learning_rate": 1.8528730580003178e-06, "loss": 0.0466, "step": 6466, "task_loss": 0.08337102830410004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987443084033609, "compression_loss": 0.0, "distillation_loss": 0.08518815040588379, "epoch": 6.14, "learning_rate": 1.8488495525099735e-06, "loss": 0.0868, "step": 6467, "task_loss": 0.10114063322544098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987462313610613, "compression_loss": 0.0, "distillation_loss": 0.04680711403489113, "epoch": 6.14, "learning_rate": 1.8448302525376132e-06, "loss": 0.0431, "step": 6468, "task_loss": 0.009998075664043427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987481523545558, "compression_loss": 0.0, "distillation_loss": 0.07749959826469421, "epoch": 6.14, "learning_rate": 1.8408151588133498e-06, "loss": 0.0829, "step": 6469, "task_loss": 0.13170292973518372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987500713848481, "compression_loss": 0.0, "distillation_loss": 0.10043874382972717, "epoch": 6.14, "learning_rate": 1.8368042720665446e-06, "loss": 0.1057, "step": 6470, "task_loss": 0.15264543890953064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987519884529419, "compression_loss": 0.0, "distillation_loss": 0.10912171006202698, "epoch": 6.15, "learning_rate": 1.8327975930258035e-06, "loss": 0.1048, "step": 6471, "task_loss": 0.0659012421965599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798753903559841, "compression_loss": 0.0, "distillation_loss": 0.01361011527478695, "epoch": 6.15, "learning_rate": 1.8287951224189553e-06, "loss": 0.0198, "step": 6472, "task_loss": 0.07584784179925919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987558167065488, "compression_loss": 0.0, "distillation_loss": 0.14504070580005646, "epoch": 6.15, "learning_rate": 1.8247968609730686e-06, "loss": 0.1404, "step": 6473, "task_loss": 0.09844270348548889 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987577278940693, "compression_loss": 0.0, "distillation_loss": 0.07923628389835358, "epoch": 6.15, "learning_rate": 1.8208028094144375e-06, "loss": 0.0724, "step": 6474, "task_loss": 0.011291364207863808 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798759637123406, "compression_loss": 0.0, "distillation_loss": 0.023012623190879822, "epoch": 6.15, "learning_rate": 1.8168129684686148e-06, "loss": 0.0308, "step": 6475, "task_loss": 0.10104191303253174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987615443955627, "compression_loss": 0.0, "distillation_loss": 0.037004388868808746, "epoch": 6.15, "learning_rate": 1.8128273388603679e-06, "loss": 0.0342, "step": 6476, "task_loss": 0.008559944108128548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987634497115429, "compression_loss": 0.0, "distillation_loss": 0.02188132330775261, "epoch": 6.15, "learning_rate": 1.808845921313701e-06, "loss": 0.0206, "step": 6477, "task_loss": 0.008831024169921875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987653530723503, "compression_loss": 0.0, "distillation_loss": 0.02209128439426422, "epoch": 6.15, "learning_rate": 1.8048687165518662e-06, "loss": 0.0204, "step": 6478, "task_loss": 0.004884377121925354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987672544789888, "compression_loss": 0.0, "distillation_loss": 0.026295151561498642, "epoch": 6.15, "learning_rate": 1.800895725297333e-06, "loss": 0.0385, "step": 6479, "task_loss": 0.14823564887046814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798769153932462, "compression_loss": 0.0, "distillation_loss": 0.02808169648051262, "epoch": 6.15, "learning_rate": 1.7969269482718265e-06, "loss": 0.0283, "step": 6480, "task_loss": 0.0304415300488472 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987710514337734, "compression_loss": 0.0, "distillation_loss": 0.09501353651285172, "epoch": 6.15, "learning_rate": 1.7929623861962785e-06, "loss": 0.1057, "step": 6481, "task_loss": 0.2016463577747345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987729469839269, "compression_loss": 0.0, "distillation_loss": 0.05659741908311844, "epoch": 6.16, "learning_rate": 1.789002039790888e-06, "loss": 0.0551, "step": 6482, "task_loss": 0.041581884026527405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798774840583926, "compression_loss": 0.0, "distillation_loss": 0.02717326581478119, "epoch": 6.16, "learning_rate": 1.78504590977506e-06, "loss": 0.0309, "step": 6483, "task_loss": 0.06475157290697098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987767322347745, "compression_loss": 0.0, "distillation_loss": 0.10455787181854248, "epoch": 6.16, "learning_rate": 1.7810939968674418e-06, "loss": 0.1024, "step": 6484, "task_loss": 0.08274058252573013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987786219374761, "compression_loss": 0.0, "distillation_loss": 0.06307333707809448, "epoch": 6.16, "learning_rate": 1.7771463017859287e-06, "loss": 0.0622, "step": 6485, "task_loss": 0.05453290790319443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987805096930344, "compression_loss": 0.0, "distillation_loss": 0.05282985419034958, "epoch": 6.16, "learning_rate": 1.77320282524763e-06, "loss": 0.0512, "step": 6486, "task_loss": 0.03684225305914879 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987823955024531, "compression_loss": 0.0, "distillation_loss": 0.04597539082169533, "epoch": 6.16, "learning_rate": 1.7692635679688986e-06, "loss": 0.0523, "step": 6487, "task_loss": 0.10876553505659103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987842793667359, "compression_loss": 0.0, "distillation_loss": 0.08502782136201859, "epoch": 6.16, "learning_rate": 1.7653285306653194e-06, "loss": 0.0824, "step": 6488, "task_loss": 0.05903245136141777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987861612868866, "compression_loss": 0.0, "distillation_loss": 0.16358347237110138, "epoch": 6.16, "learning_rate": 1.7613977140517158e-06, "loss": 0.1704, "step": 6489, "task_loss": 0.23221097886562347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987880412639087, "compression_loss": 0.0, "distillation_loss": 0.05940496176481247, "epoch": 6.16, "learning_rate": 1.7574711188421356e-06, "loss": 0.0593, "step": 6490, "task_loss": 0.05786725506186485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987899192988059, "compression_loss": 0.0, "distillation_loss": 0.05588078126311302, "epoch": 6.16, "learning_rate": 1.7535487457498583e-06, "loss": 0.0596, "step": 6491, "task_loss": 0.09317133575677872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798791795392582, "compression_loss": 0.0, "distillation_loss": 0.0370253287255764, "epoch": 6.17, "learning_rate": 1.7496305954874142e-06, "loss": 0.0443, "step": 6492, "task_loss": 0.10931254923343658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987936695462405, "compression_loss": 0.0, "distillation_loss": 0.03799361735582352, "epoch": 6.17, "learning_rate": 1.7457166687665449e-06, "loss": 0.045, "step": 6493, "task_loss": 0.1080305278301239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987955417607854, "compression_loss": 0.0, "distillation_loss": 0.0513494536280632, "epoch": 6.17, "learning_rate": 1.7418069662982344e-06, "loss": 0.0493, "step": 6494, "task_loss": 0.03114178031682968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987974120372201, "compression_loss": 0.0, "distillation_loss": 0.0808219462633133, "epoch": 6.17, "learning_rate": 1.7379014887927064e-06, "loss": 0.0772, "step": 6495, "task_loss": 0.044266168028116226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7987992803765482, "compression_loss": 0.0, "distillation_loss": 0.19209139049053192, "epoch": 6.17, "learning_rate": 1.734000236959399e-06, "loss": 0.1894, "step": 6496, "task_loss": 0.16527491807937622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988011467797738, "compression_loss": 0.0, "distillation_loss": 0.033167578279972076, "epoch": 6.17, "learning_rate": 1.7301032115070003e-06, "loss": 0.0421, "step": 6497, "task_loss": 0.12249952554702759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988030112479001, "compression_loss": 0.0, "distillation_loss": 0.061443835496902466, "epoch": 6.17, "learning_rate": 1.7262104131434226e-06, "loss": 0.0669, "step": 6498, "task_loss": 0.11603046953678131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988048737819311, "compression_loss": 0.0, "distillation_loss": 0.020476579666137695, "epoch": 6.17, "learning_rate": 1.722321842575811e-06, "loss": 0.0188, "step": 6499, "task_loss": 0.0034268908202648163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988067343828704, "compression_loss": 0.0, "distillation_loss": 0.0780348926782608, "epoch": 6.17, "learning_rate": 1.7184375005105474e-06, "loss": 0.0799, "step": 6500, "task_loss": 0.09668619930744171 }, { "epoch": 6.17, "eval_accuracy": 0.8876146788990825, "eval_loss": 0.4523507356643677, "eval_runtime": 18.245, "eval_samples_per_second": 47.794, "eval_steps_per_second": 5.974, "step": 6500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988085930517217, "compression_loss": 0.0, "distillation_loss": 0.019627831876277924, "epoch": 6.17, "learning_rate": 1.714557387653229e-06, "loss": 0.0234, "step": 6501, "task_loss": 0.0573212131857872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988104497894886, "compression_loss": 0.0, "distillation_loss": 0.04171549528837204, "epoch": 6.17, "learning_rate": 1.710681504708711e-06, "loss": 0.0404, "step": 6502, "task_loss": 0.028218252584338188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988123045971749, "compression_loss": 0.0, "distillation_loss": 0.03882744908332825, "epoch": 6.18, "learning_rate": 1.7068098523810611e-06, "loss": 0.0428, "step": 6503, "task_loss": 0.07813085615634918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988141574757841, "compression_loss": 0.0, "distillation_loss": 0.028121117502450943, "epoch": 6.18, "learning_rate": 1.7029424313735776e-06, "loss": 0.0338, "step": 6504, "task_loss": 0.08514552563428879 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988160084263201, "compression_loss": 0.0, "distillation_loss": 0.0896795317530632, "epoch": 6.18, "learning_rate": 1.6990792423888013e-06, "loss": 0.1013, "step": 6505, "task_loss": 0.20579922199249268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988178574497864, "compression_loss": 0.0, "distillation_loss": 0.09015851467847824, "epoch": 6.18, "learning_rate": 1.6952202861285044e-06, "loss": 0.0904, "step": 6506, "task_loss": 0.09299979358911514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988197045471869, "compression_loss": 0.0, "distillation_loss": 0.21845272183418274, "epoch": 6.18, "learning_rate": 1.6913655632936787e-06, "loss": 0.2116, "step": 6507, "task_loss": 0.1496710181236267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798821549719525, "compression_loss": 0.0, "distillation_loss": 0.05457516014575958, "epoch": 6.18, "learning_rate": 1.6875150745845503e-06, "loss": 0.0568, "step": 6508, "task_loss": 0.07647211849689484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988233929678046, "compression_loss": 0.0, "distillation_loss": 0.059655383229255676, "epoch": 6.18, "learning_rate": 1.6836688207005846e-06, "loss": 0.0653, "step": 6509, "task_loss": 0.11646562069654465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988252342930292, "compression_loss": 0.0, "distillation_loss": 0.06423240900039673, "epoch": 6.18, "learning_rate": 1.6798268023404727e-06, "loss": 0.0602, "step": 6510, "task_loss": 0.023659339174628258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988270736962028, "compression_loss": 0.0, "distillation_loss": 0.06197018176317215, "epoch": 6.18, "learning_rate": 1.6759890202021289e-06, "loss": 0.0629, "step": 6511, "task_loss": 0.07137607038021088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988289111783287, "compression_loss": 0.0, "distillation_loss": 0.026029150933027267, "epoch": 6.18, "learning_rate": 1.6721554749827116e-06, "loss": 0.0309, "step": 6512, "task_loss": 0.07451260089874268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988307467404108, "compression_loss": 0.0, "distillation_loss": 0.04224439710378647, "epoch": 6.19, "learning_rate": 1.6683261673786033e-06, "loss": 0.052, "step": 6513, "task_loss": 0.14017590880393982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988325803834527, "compression_loss": 0.0, "distillation_loss": 0.06829048693180084, "epoch": 6.19, "learning_rate": 1.664501098085408e-06, "loss": 0.0639, "step": 6514, "task_loss": 0.024488359689712524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988344121084583, "compression_loss": 0.0, "distillation_loss": 0.08032073080539703, "epoch": 6.19, "learning_rate": 1.6606802677979732e-06, "loss": 0.0757, "step": 6515, "task_loss": 0.03441820666193962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988362419164309, "compression_loss": 0.0, "distillation_loss": 0.025070223957300186, "epoch": 6.19, "learning_rate": 1.656863677210374e-06, "loss": 0.0523, "step": 6516, "task_loss": 0.2975619435310364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988380698083745, "compression_loss": 0.0, "distillation_loss": 0.035705260932445526, "epoch": 6.19, "learning_rate": 1.6530513270159116e-06, "loss": 0.0358, "step": 6517, "task_loss": 0.03673187643289566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988398957852926, "compression_loss": 0.0, "distillation_loss": 0.09474039822816849, "epoch": 6.19, "learning_rate": 1.6492432179071094e-06, "loss": 0.0979, "step": 6518, "task_loss": 0.12658804655075073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798841719848189, "compression_loss": 0.0, "distillation_loss": 0.057927001267671585, "epoch": 6.19, "learning_rate": 1.645439350575742e-06, "loss": 0.0682, "step": 6519, "task_loss": 0.16106078028678894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988435419980673, "compression_loss": 0.0, "distillation_loss": 0.05671095848083496, "epoch": 6.19, "learning_rate": 1.6416397257127902e-06, "loss": 0.0594, "step": 6520, "task_loss": 0.08323853462934494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988453622359312, "compression_loss": 0.0, "distillation_loss": 0.06431238353252411, "epoch": 6.19, "learning_rate": 1.637844344008474e-06, "loss": 0.0647, "step": 6521, "task_loss": 0.06814312934875488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988471805627844, "compression_loss": 0.0, "distillation_loss": 0.03375518321990967, "epoch": 6.19, "learning_rate": 1.6340532061522474e-06, "loss": 0.0327, "step": 6522, "task_loss": 0.022706888616085052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988489969796306, "compression_loss": 0.0, "distillation_loss": 0.052014727145433426, "epoch": 6.19, "learning_rate": 1.6302663128327927e-06, "loss": 0.0496, "step": 6523, "task_loss": 0.028193766251206398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988508114874735, "compression_loss": 0.0, "distillation_loss": 0.10802538692951202, "epoch": 6.2, "learning_rate": 1.62648366473801e-06, "loss": 0.104, "step": 6524, "task_loss": 0.06763241440057755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988526240873166, "compression_loss": 0.0, "distillation_loss": 0.05575351417064667, "epoch": 6.2, "learning_rate": 1.6227052625550327e-06, "loss": 0.0649, "step": 6525, "task_loss": 0.14689795672893524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988544347801638, "compression_loss": 0.0, "distillation_loss": 0.08440142869949341, "epoch": 6.2, "learning_rate": 1.6189311069702367e-06, "loss": 0.0887, "step": 6526, "task_loss": 0.12698373198509216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988562435670188, "compression_loss": 0.0, "distillation_loss": 0.04269465059041977, "epoch": 6.2, "learning_rate": 1.61516119866921e-06, "loss": 0.047, "step": 6527, "task_loss": 0.08578742295503616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798858050448885, "compression_loss": 0.0, "distillation_loss": 0.024140551686286926, "epoch": 6.2, "learning_rate": 1.6113955383367685e-06, "loss": 0.024, "step": 6528, "task_loss": 0.022829843685030937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988598554267664, "compression_loss": 0.0, "distillation_loss": 0.033472198992967606, "epoch": 6.2, "learning_rate": 1.6076341266569734e-06, "loss": 0.0353, "step": 6529, "task_loss": 0.05162709578871727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988616585016666, "compression_loss": 0.0, "distillation_loss": 0.04639057815074921, "epoch": 6.2, "learning_rate": 1.6038769643130973e-06, "loss": 0.0581, "step": 6530, "task_loss": 0.1631542444229126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988634596745892, "compression_loss": 0.0, "distillation_loss": 0.041402582079172134, "epoch": 6.2, "learning_rate": 1.600124051987645e-06, "loss": 0.0542, "step": 6531, "task_loss": 0.16910916566848755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988652589465378, "compression_loss": 0.0, "distillation_loss": 0.045691799372434616, "epoch": 6.2, "learning_rate": 1.5963753903623535e-06, "loss": 0.0528, "step": 6532, "task_loss": 0.11651049554347992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988670563185163, "compression_loss": 0.0, "distillation_loss": 0.03516625612974167, "epoch": 6.2, "learning_rate": 1.592630980118187e-06, "loss": 0.034, "step": 6533, "task_loss": 0.02300405688583851 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988688517915282, "compression_loss": 0.0, "distillation_loss": 0.039826877415180206, "epoch": 6.21, "learning_rate": 1.5888908219353349e-06, "loss": 0.039, "step": 6534, "task_loss": 0.03174047917127609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988706453665774, "compression_loss": 0.0, "distillation_loss": 0.036477230489254, "epoch": 6.21, "learning_rate": 1.5851549164932116e-06, "loss": 0.034, "step": 6535, "task_loss": 0.011402864009141922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988724370446674, "compression_loss": 0.0, "distillation_loss": 0.05315593630075455, "epoch": 6.21, "learning_rate": 1.5814232644704691e-06, "loss": 0.0655, "step": 6536, "task_loss": 0.17702096700668335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988742268268018, "compression_loss": 0.0, "distillation_loss": 0.1766928732395172, "epoch": 6.21, "learning_rate": 1.5776958665449731e-06, "loss": 0.1711, "step": 6537, "task_loss": 0.12121965736150742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988760147139845, "compression_loss": 0.0, "distillation_loss": 0.04658925160765648, "epoch": 6.21, "learning_rate": 1.5739727233938239e-06, "loss": 0.0449, "step": 6538, "task_loss": 0.029311183840036392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988778007072191, "compression_loss": 0.0, "distillation_loss": 0.038374051451683044, "epoch": 6.21, "learning_rate": 1.5702538356933555e-06, "loss": 0.0453, "step": 6539, "task_loss": 0.1074916422367096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988795848075092, "compression_loss": 0.0, "distillation_loss": 0.026442725211381912, "epoch": 6.21, "learning_rate": 1.5665392041191107e-06, "loss": 0.0276, "step": 6540, "task_loss": 0.03780139237642288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988813670158587, "compression_loss": 0.0, "distillation_loss": 0.025853261351585388, "epoch": 6.21, "learning_rate": 1.5628288293458804e-06, "loss": 0.0292, "step": 6541, "task_loss": 0.058958619832992554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798883147333271, "compression_loss": 0.0, "distillation_loss": 0.06454972922801971, "epoch": 6.21, "learning_rate": 1.5591227120476643e-06, "loss": 0.0724, "step": 6542, "task_loss": 0.1430365890264511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79888492576075, "compression_loss": 0.0, "distillation_loss": 0.025234917178750038, "epoch": 6.21, "learning_rate": 1.5554208528977044e-06, "loss": 0.0235, "step": 6543, "task_loss": 0.007690908387303352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988867022992993, "compression_loss": 0.0, "distillation_loss": 0.06024536117911339, "epoch": 6.21, "learning_rate": 1.5517232525684571e-06, "loss": 0.0693, "step": 6544, "task_loss": 0.15101151168346405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988884769499225, "compression_loss": 0.0, "distillation_loss": 0.04414193704724312, "epoch": 6.22, "learning_rate": 1.548029911731605e-06, "loss": 0.0416, "step": 6545, "task_loss": 0.019007055088877678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988902497136234, "compression_loss": 0.0, "distillation_loss": 0.028746753931045532, "epoch": 6.22, "learning_rate": 1.5443408310580692e-06, "loss": 0.0323, "step": 6546, "task_loss": 0.06452102959156036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988920205914057, "compression_loss": 0.0, "distillation_loss": 0.02359924092888832, "epoch": 6.22, "learning_rate": 1.5406560112179864e-06, "loss": 0.0291, "step": 6547, "task_loss": 0.07876047492027283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988937895842729, "compression_loss": 0.0, "distillation_loss": 0.020535187795758247, "epoch": 6.22, "learning_rate": 1.5369754528807152e-06, "loss": 0.019, "step": 6548, "task_loss": 0.005380989983677864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798895556693229, "compression_loss": 0.0, "distillation_loss": 0.04782063886523247, "epoch": 6.22, "learning_rate": 1.5332991567148515e-06, "loss": 0.058, "step": 6549, "task_loss": 0.1497877985239029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988973219192774, "compression_loss": 0.0, "distillation_loss": 0.024879930540919304, "epoch": 6.22, "learning_rate": 1.5296271233882165e-06, "loss": 0.0321, "step": 6550, "task_loss": 0.09727062284946442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7988990852634219, "compression_loss": 0.0, "distillation_loss": 0.040715500712394714, "epoch": 6.22, "learning_rate": 1.5259593535678491e-06, "loss": 0.0399, "step": 6551, "task_loss": 0.03258955478668213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989008467266661, "compression_loss": 0.0, "distillation_loss": 0.03168099373579025, "epoch": 6.22, "learning_rate": 1.522295847920019e-06, "loss": 0.0492, "step": 6552, "task_loss": 0.20730939507484436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989026063100138, "compression_loss": 0.0, "distillation_loss": 0.08038985729217529, "epoch": 6.22, "learning_rate": 1.5186366071102133e-06, "loss": 0.0947, "step": 6553, "task_loss": 0.22315886616706848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989043640144685, "compression_loss": 0.0, "distillation_loss": 0.022372357547283173, "epoch": 6.22, "learning_rate": 1.5149816318031584e-06, "loss": 0.0208, "step": 6554, "task_loss": 0.00664399191737175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989061198410341, "compression_loss": 0.0, "distillation_loss": 0.06297887861728668, "epoch": 6.23, "learning_rate": 1.5113309226627986e-06, "loss": 0.0727, "step": 6555, "task_loss": 0.16039499640464783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989078737907143, "compression_loss": 0.0, "distillation_loss": 0.06179758906364441, "epoch": 6.23, "learning_rate": 1.5076844803522922e-06, "loss": 0.0664, "step": 6556, "task_loss": 0.10753493010997772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989096258645125, "compression_loss": 0.0, "distillation_loss": 0.025135168805718422, "epoch": 6.23, "learning_rate": 1.5040423055340396e-06, "loss": 0.0317, "step": 6557, "task_loss": 0.09094193577766418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989113760634325, "compression_loss": 0.0, "distillation_loss": 0.045239001512527466, "epoch": 6.23, "learning_rate": 1.5004043988696647e-06, "loss": 0.0541, "step": 6558, "task_loss": 0.13419125974178314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989131243884782, "compression_loss": 0.0, "distillation_loss": 0.0402006097137928, "epoch": 6.23, "learning_rate": 1.4967707610200083e-06, "loss": 0.0407, "step": 6559, "task_loss": 0.0453285276889801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798914870840653, "compression_loss": 0.0, "distillation_loss": 0.043260738253593445, "epoch": 6.23, "learning_rate": 1.493141392645131e-06, "loss": 0.0406, "step": 6560, "task_loss": 0.01651701144874096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989166154209608, "compression_loss": 0.0, "distillation_loss": 0.1227022111415863, "epoch": 6.23, "learning_rate": 1.4895162944043334e-06, "loss": 0.1221, "step": 6561, "task_loss": 0.11656280606985092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989183581304051, "compression_loss": 0.0, "distillation_loss": 0.01965763419866562, "epoch": 6.23, "learning_rate": 1.4858954669561275e-06, "loss": 0.0259, "step": 6562, "task_loss": 0.08250312507152557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989200989699897, "compression_loss": 0.0, "distillation_loss": 0.03354266285896301, "epoch": 6.23, "learning_rate": 1.4822789109582513e-06, "loss": 0.031, "step": 6563, "task_loss": 0.00812080129981041 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989218379407182, "compression_loss": 0.0, "distillation_loss": 0.10104814171791077, "epoch": 6.23, "learning_rate": 1.478666627067679e-06, "loss": 0.0967, "step": 6564, "task_loss": 0.05804811045527458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989235750435945, "compression_loss": 0.0, "distillation_loss": 0.07657065242528915, "epoch": 6.23, "learning_rate": 1.4750586159405915e-06, "loss": 0.0822, "step": 6565, "task_loss": 0.13330647349357605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989253102796219, "compression_loss": 0.0, "distillation_loss": 0.018394935876131058, "epoch": 6.24, "learning_rate": 1.4714548782324034e-06, "loss": 0.0265, "step": 6566, "task_loss": 0.09906549006700516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989270436498043, "compression_loss": 0.0, "distillation_loss": 0.051937442272901535, "epoch": 6.24, "learning_rate": 1.467855414597749e-06, "loss": 0.0475, "step": 6567, "task_loss": 0.007465232163667679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989287751551454, "compression_loss": 0.0, "distillation_loss": 0.049969661980867386, "epoch": 6.24, "learning_rate": 1.4642602256904946e-06, "loss": 0.0474, "step": 6568, "task_loss": 0.023928040638566017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798930504796649, "compression_loss": 0.0, "distillation_loss": 0.03371240198612213, "epoch": 6.24, "learning_rate": 1.4606693121637206e-06, "loss": 0.0316, "step": 6569, "task_loss": 0.012854812666773796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989322325753184, "compression_loss": 0.0, "distillation_loss": 0.2045125961303711, "epoch": 6.24, "learning_rate": 1.457082674669727e-06, "loss": 0.2015, "step": 6570, "task_loss": 0.17422333359718323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989339584921576, "compression_loss": 0.0, "distillation_loss": 0.02525508962571621, "epoch": 6.24, "learning_rate": 1.4535003138600566e-06, "loss": 0.0305, "step": 6571, "task_loss": 0.07814519852399826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989356825481703, "compression_loss": 0.0, "distillation_loss": 0.07085863500833511, "epoch": 6.24, "learning_rate": 1.4499222303854532e-06, "loss": 0.0728, "step": 6572, "task_loss": 0.09030604362487793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989374047443599, "compression_loss": 0.0, "distillation_loss": 0.09062013030052185, "epoch": 6.24, "learning_rate": 1.4463484248958908e-06, "loss": 0.0856, "step": 6573, "task_loss": 0.04006872698664665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989391250817305, "compression_loss": 0.0, "distillation_loss": 0.0634981244802475, "epoch": 6.24, "learning_rate": 1.4427788980405728e-06, "loss": 0.0603, "step": 6574, "task_loss": 0.031479597091674805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989408435612854, "compression_loss": 0.0, "distillation_loss": 0.0360356867313385, "epoch": 6.24, "learning_rate": 1.4392136504679244e-06, "loss": 0.0351, "step": 6575, "task_loss": 0.026443321257829666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989425601840284, "compression_loss": 0.0, "distillation_loss": 0.0444665402173996, "epoch": 6.25, "learning_rate": 1.4356526828255862e-06, "loss": 0.0505, "step": 6576, "task_loss": 0.10513627529144287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989442749509632, "compression_loss": 0.0, "distillation_loss": 0.026376407593488693, "epoch": 6.25, "learning_rate": 1.432095995760424e-06, "loss": 0.034, "step": 6577, "task_loss": 0.10233157873153687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989459878630936, "compression_loss": 0.0, "distillation_loss": 0.04820052161812782, "epoch": 6.25, "learning_rate": 1.4285435899185295e-06, "loss": 0.0516, "step": 6578, "task_loss": 0.08200520277023315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989476989214231, "compression_loss": 0.0, "distillation_loss": 0.06717506051063538, "epoch": 6.25, "learning_rate": 1.424995465945214e-06, "loss": 0.0792, "step": 6579, "task_loss": 0.1877632588148117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989494081269555, "compression_loss": 0.0, "distillation_loss": 0.05002850666642189, "epoch": 6.25, "learning_rate": 1.4214516244850068e-06, "loss": 0.0534, "step": 6580, "task_loss": 0.08330188691616058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989511154806944, "compression_loss": 0.0, "distillation_loss": 0.055970218032598495, "epoch": 6.25, "learning_rate": 1.417912066181673e-06, "loss": 0.0618, "step": 6581, "task_loss": 0.11391516029834747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989528209836435, "compression_loss": 0.0, "distillation_loss": 0.03707553446292877, "epoch": 6.25, "learning_rate": 1.414376791678182e-06, "loss": 0.045, "step": 6582, "task_loss": 0.11639431864023209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989545246368066, "compression_loss": 0.0, "distillation_loss": 0.09972971677780151, "epoch": 6.25, "learning_rate": 1.4108458016167337e-06, "loss": 0.1031, "step": 6583, "task_loss": 0.13307958841323853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989562264411872, "compression_loss": 0.0, "distillation_loss": 0.15906114876270294, "epoch": 6.25, "learning_rate": 1.407319096638754e-06, "loss": 0.16, "step": 6584, "task_loss": 0.16835139691829681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989579263977891, "compression_loss": 0.0, "distillation_loss": 0.24002781510353088, "epoch": 6.25, "learning_rate": 1.403796677384886e-06, "loss": 0.2357, "step": 6585, "task_loss": 0.19643017649650574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989596245076159, "compression_loss": 0.0, "distillation_loss": 0.023733289912343025, "epoch": 6.25, "learning_rate": 1.4002785444949928e-06, "loss": 0.0217, "step": 6586, "task_loss": 0.003847165033221245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989613207716714, "compression_loss": 0.0, "distillation_loss": 0.028031958267092705, "epoch": 6.26, "learning_rate": 1.396764698608155e-06, "loss": 0.0294, "step": 6587, "task_loss": 0.04165419936180115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989630151909591, "compression_loss": 0.0, "distillation_loss": 0.0592382550239563, "epoch": 6.26, "learning_rate": 1.393255140362687e-06, "loss": 0.0561, "step": 6588, "task_loss": 0.027590807527303696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798964707766483, "compression_loss": 0.0, "distillation_loss": 0.04887683689594269, "epoch": 6.26, "learning_rate": 1.3897498703961148e-06, "loss": 0.047, "step": 6589, "task_loss": 0.030398232862353325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989663984992464, "compression_loss": 0.0, "distillation_loss": 0.12447142601013184, "epoch": 6.26, "learning_rate": 1.3862488893451847e-06, "loss": 0.1271, "step": 6590, "task_loss": 0.15074971318244934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989680873902533, "compression_loss": 0.0, "distillation_loss": 0.04007259011268616, "epoch": 6.26, "learning_rate": 1.3827521978458713e-06, "loss": 0.0504, "step": 6591, "task_loss": 0.14287948608398438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989697744405071, "compression_loss": 0.0, "distillation_loss": 0.05581611022353172, "epoch": 6.26, "learning_rate": 1.3792597965333581e-06, "loss": 0.0642, "step": 6592, "task_loss": 0.13964907824993134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989714596510117, "compression_loss": 0.0, "distillation_loss": 0.024131378158926964, "epoch": 6.26, "learning_rate": 1.3757716860420683e-06, "loss": 0.0303, "step": 6593, "task_loss": 0.08553370833396912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989731430227707, "compression_loss": 0.0, "distillation_loss": 0.020088283345103264, "epoch": 6.26, "learning_rate": 1.3722878670056227e-06, "loss": 0.0185, "step": 6594, "task_loss": 0.003951072692871094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989748245567877, "compression_loss": 0.0, "distillation_loss": 0.031761474907398224, "epoch": 6.26, "learning_rate": 1.368808340056879e-06, "loss": 0.0333, "step": 6595, "task_loss": 0.04679109528660774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989765042540665, "compression_loss": 0.0, "distillation_loss": 0.02415706217288971, "epoch": 6.26, "learning_rate": 1.3653331058279122e-06, "loss": 0.0225, "step": 6596, "task_loss": 0.007262144237756729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989781821156109, "compression_loss": 0.0, "distillation_loss": 0.0280197374522686, "epoch": 6.26, "learning_rate": 1.361862164950009e-06, "loss": 0.0287, "step": 6597, "task_loss": 0.03486378863453865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989798581424242, "compression_loss": 0.0, "distillation_loss": 0.015901973471045494, "epoch": 6.27, "learning_rate": 1.35839551805369e-06, "loss": 0.0156, "step": 6598, "task_loss": 0.012705270200967789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989815323355105, "compression_loss": 0.0, "distillation_loss": 0.024427423253655434, "epoch": 6.27, "learning_rate": 1.354933165768682e-06, "loss": 0.0236, "step": 6599, "task_loss": 0.016438543796539307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989832046958733, "compression_loss": 0.0, "distillation_loss": 0.05082686245441437, "epoch": 6.27, "learning_rate": 1.3514751087239402e-06, "loss": 0.0588, "step": 6600, "task_loss": 0.13012255728244781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989848752245162, "compression_loss": 0.0, "distillation_loss": 0.019267812371253967, "epoch": 6.27, "learning_rate": 1.3480213475476344e-06, "loss": 0.026, "step": 6601, "task_loss": 0.08636897802352905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798986543922443, "compression_loss": 0.0, "distillation_loss": 0.09770554304122925, "epoch": 6.27, "learning_rate": 1.3445718828671655e-06, "loss": 0.0972, "step": 6602, "task_loss": 0.09245851635932922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989882107906573, "compression_loss": 0.0, "distillation_loss": 0.03061073273420334, "epoch": 6.27, "learning_rate": 1.3411267153091378e-06, "loss": 0.0339, "step": 6603, "task_loss": 0.06348737329244614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989898758301629, "compression_loss": 0.0, "distillation_loss": 0.09166794270277023, "epoch": 6.27, "learning_rate": 1.3376858454993813e-06, "loss": 0.1133, "step": 6604, "task_loss": 0.30779021978378296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989915390419633, "compression_loss": 0.0, "distillation_loss": 0.030499190092086792, "epoch": 6.27, "learning_rate": 1.3342492740629541e-06, "loss": 0.0326, "step": 6605, "task_loss": 0.05136849731206894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989932004270623, "compression_loss": 0.0, "distillation_loss": 0.05024800822138786, "epoch": 6.27, "learning_rate": 1.330817001624124e-06, "loss": 0.0474, "step": 6606, "task_loss": 0.02216871827840805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989948599864637, "compression_loss": 0.0, "distillation_loss": 0.029151551425457, "epoch": 6.27, "learning_rate": 1.3273890288063722e-06, "loss": 0.0349, "step": 6607, "task_loss": 0.08693638443946838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.798996517721171, "compression_loss": 0.0, "distillation_loss": 0.08503906428813934, "epoch": 6.28, "learning_rate": 1.323965356232415e-06, "loss": 0.0872, "step": 6608, "task_loss": 0.10668846219778061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989981736321878, "compression_loss": 0.0, "distillation_loss": 0.03245946019887924, "epoch": 6.28, "learning_rate": 1.3205459845241714e-06, "loss": 0.0299, "step": 6609, "task_loss": 0.006885243579745293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7989998277205181, "compression_loss": 0.0, "distillation_loss": 0.0519396997988224, "epoch": 6.28, "learning_rate": 1.317130914302797e-06, "loss": 0.051, "step": 6610, "task_loss": 0.04283105209469795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990014799871654, "compression_loss": 0.0, "distillation_loss": 0.03950023651123047, "epoch": 6.28, "learning_rate": 1.3137201461886434e-06, "loss": 0.0457, "step": 6611, "task_loss": 0.10137784481048584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990031304331333, "compression_loss": 0.0, "distillation_loss": 0.03535441309213638, "epoch": 6.28, "learning_rate": 1.3103136808013061e-06, "loss": 0.0381, "step": 6612, "task_loss": 0.06290829181671143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990047790594256, "compression_loss": 0.0, "distillation_loss": 0.07202986627817154, "epoch": 6.28, "learning_rate": 1.3069115187595793e-06, "loss": 0.0809, "step": 6613, "task_loss": 0.1610376387834549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799006425867046, "compression_loss": 0.0, "distillation_loss": 0.128701314330101, "epoch": 6.28, "learning_rate": 1.3035136606814769e-06, "loss": 0.1386, "step": 6614, "task_loss": 0.22806209325790405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799008070856998, "compression_loss": 0.0, "distillation_loss": 0.05727306380867958, "epoch": 6.28, "learning_rate": 1.3001201071842466e-06, "loss": 0.0639, "step": 6615, "task_loss": 0.1234469786286354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990097140302854, "compression_loss": 0.0, "distillation_loss": 0.029445184394717216, "epoch": 6.28, "learning_rate": 1.2967308588843375e-06, "loss": 0.027, "step": 6616, "task_loss": 0.0048278942704200745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799011355387912, "compression_loss": 0.0, "distillation_loss": 0.06337439268827438, "epoch": 6.28, "learning_rate": 1.2933459163974203e-06, "loss": 0.077, "step": 6617, "task_loss": 0.19938617944717407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990129949308814, "compression_loss": 0.0, "distillation_loss": 0.03333733230829239, "epoch": 6.28, "learning_rate": 1.2899652803383926e-06, "loss": 0.0305, "step": 6618, "task_loss": 0.00532466359436512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990146326601971, "compression_loss": 0.0, "distillation_loss": 0.07982062548398972, "epoch": 6.29, "learning_rate": 1.286588951321363e-06, "loss": 0.0878, "step": 6619, "task_loss": 0.1597793698310852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990162685768631, "compression_loss": 0.0, "distillation_loss": 0.02913430519402027, "epoch": 6.29, "learning_rate": 1.2832169299596546e-06, "loss": 0.027, "step": 6620, "task_loss": 0.00752607174217701 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990179026818829, "compression_loss": 0.0, "distillation_loss": 0.05205439403653145, "epoch": 6.29, "learning_rate": 1.2798492168658083e-06, "loss": 0.0606, "step": 6621, "task_loss": 0.13726334273815155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990195349762601, "compression_loss": 0.0, "distillation_loss": 0.050472185015678406, "epoch": 6.29, "learning_rate": 1.2764858126515928e-06, "loss": 0.0563, "step": 6622, "task_loss": 0.10921429097652435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990211654609986, "compression_loss": 0.0, "distillation_loss": 0.031548745930194855, "epoch": 6.29, "learning_rate": 1.2731267179279832e-06, "loss": 0.0319, "step": 6623, "task_loss": 0.03549543768167496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799022794137102, "compression_loss": 0.0, "distillation_loss": 0.06983719766139984, "epoch": 6.29, "learning_rate": 1.2697719333051723e-06, "loss": 0.082, "step": 6624, "task_loss": 0.191563218832016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990244210055738, "compression_loss": 0.0, "distillation_loss": 0.019796200096607208, "epoch": 6.29, "learning_rate": 1.2664214593925776e-06, "loss": 0.0262, "step": 6625, "task_loss": 0.08367738872766495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990260460674179, "compression_loss": 0.0, "distillation_loss": 0.038845110684633255, "epoch": 6.29, "learning_rate": 1.263075296798824e-06, "loss": 0.052, "step": 6626, "task_loss": 0.17067357897758484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990276693236379, "compression_loss": 0.0, "distillation_loss": 0.029467133805155754, "epoch": 6.29, "learning_rate": 1.2597334461317667e-06, "loss": 0.0382, "step": 6627, "task_loss": 0.11635088920593262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990292907752375, "compression_loss": 0.0, "distillation_loss": 0.0395752415060997, "epoch": 6.29, "learning_rate": 1.2563959079984588e-06, "loss": 0.037, "step": 6628, "task_loss": 0.014232108369469643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990309104232205, "compression_loss": 0.0, "distillation_loss": 0.024423949420452118, "epoch": 6.3, "learning_rate": 1.2530626830051878e-06, "loss": 0.0302, "step": 6629, "task_loss": 0.08202096819877625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990325282685903, "compression_loss": 0.0, "distillation_loss": 0.03499576449394226, "epoch": 6.3, "learning_rate": 1.249733771757447e-06, "loss": 0.0361, "step": 6630, "task_loss": 0.045992329716682434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990341443123508, "compression_loss": 0.0, "distillation_loss": 0.11989525705575943, "epoch": 6.3, "learning_rate": 1.2464091748599443e-06, "loss": 0.1271, "step": 6631, "task_loss": 0.19175073504447937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990357585555057, "compression_loss": 0.0, "distillation_loss": 0.030376242473721504, "epoch": 6.3, "learning_rate": 1.243088892916619e-06, "loss": 0.0319, "step": 6632, "task_loss": 0.04513759911060333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990373709990585, "compression_loss": 0.0, "distillation_loss": 0.10441594570875168, "epoch": 6.3, "learning_rate": 1.239772926530608e-06, "loss": 0.1108, "step": 6633, "task_loss": 0.16776102781295776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799038981644013, "compression_loss": 0.0, "distillation_loss": 0.04732033982872963, "epoch": 6.3, "learning_rate": 1.2364612763042793e-06, "loss": 0.0544, "step": 6634, "task_loss": 0.11861524730920792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990405904913729, "compression_loss": 0.0, "distillation_loss": 0.022771336138248444, "epoch": 6.3, "learning_rate": 1.2331539428391963e-06, "loss": 0.021, "step": 6635, "task_loss": 0.005372023209929466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990421975421418, "compression_loss": 0.0, "distillation_loss": 0.06504938006401062, "epoch": 6.3, "learning_rate": 1.2298509267361702e-06, "loss": 0.0743, "step": 6636, "task_loss": 0.1580170840024948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990438027973236, "compression_loss": 0.0, "distillation_loss": 0.04712830111384392, "epoch": 6.3, "learning_rate": 1.2265522285952013e-06, "loss": 0.051, "step": 6637, "task_loss": 0.08559815585613251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990454062579216, "compression_loss": 0.0, "distillation_loss": 0.024217963218688965, "epoch": 6.3, "learning_rate": 1.2232578490155105e-06, "loss": 0.0226, "step": 6638, "task_loss": 0.0075660087168216705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990470079249398, "compression_loss": 0.0, "distillation_loss": 0.021058350801467896, "epoch": 6.3, "learning_rate": 1.219967788595544e-06, "loss": 0.0246, "step": 6639, "task_loss": 0.05632723867893219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990486077993818, "compression_loss": 0.0, "distillation_loss": 0.056007929146289825, "epoch": 6.31, "learning_rate": 1.2166820479329572e-06, "loss": 0.0745, "step": 6640, "task_loss": 0.24108853936195374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990502058822513, "compression_loss": 0.0, "distillation_loss": 0.02392977848649025, "epoch": 6.31, "learning_rate": 1.2134006276246169e-06, "loss": 0.0225, "step": 6641, "task_loss": 0.00946834497153759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990518021745519, "compression_loss": 0.0, "distillation_loss": 0.027623049914836884, "epoch": 6.31, "learning_rate": 1.2101235282666045e-06, "loss": 0.026, "step": 6642, "task_loss": 0.011274173855781555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990533966772873, "compression_loss": 0.0, "distillation_loss": 0.04122261703014374, "epoch": 6.31, "learning_rate": 1.2068507504542332e-06, "loss": 0.0483, "step": 6643, "task_loss": 0.11227674782276154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990549893914611, "compression_loss": 0.0, "distillation_loss": 0.014780269004404545, "epoch": 6.31, "learning_rate": 1.2035822947820074e-06, "loss": 0.0215, "step": 6644, "task_loss": 0.08238159120082855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990565803180774, "compression_loss": 0.0, "distillation_loss": 0.10595270246267319, "epoch": 6.31, "learning_rate": 1.200318161843661e-06, "loss": 0.1129, "step": 6645, "task_loss": 0.17518508434295654 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990581694581393, "compression_loss": 0.0, "distillation_loss": 0.04436454176902771, "epoch": 6.31, "learning_rate": 1.1970583522321472e-06, "loss": 0.0475, "step": 6646, "task_loss": 0.07612089067697525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990597568126508, "compression_loss": 0.0, "distillation_loss": 0.028508765622973442, "epoch": 6.31, "learning_rate": 1.1938028665396173e-06, "loss": 0.0261, "step": 6647, "task_loss": 0.003986643627285957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990613423826156, "compression_loss": 0.0, "distillation_loss": 0.12394885718822479, "epoch": 6.31, "learning_rate": 1.190551705357451e-06, "loss": 0.1199, "step": 6648, "task_loss": 0.08359791338443756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990629261690374, "compression_loss": 0.0, "distillation_loss": 0.0263461172580719, "epoch": 6.31, "learning_rate": 1.187304869276229e-06, "loss": 0.0244, "step": 6649, "task_loss": 0.006703455001115799 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990645081729196, "compression_loss": 0.0, "distillation_loss": 0.04055074229836464, "epoch": 6.32, "learning_rate": 1.184062358885768e-06, "loss": 0.0377, "step": 6650, "task_loss": 0.012343864887952805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990660883952663, "compression_loss": 0.0, "distillation_loss": 0.03743916004896164, "epoch": 6.32, "learning_rate": 1.1808241747750748e-06, "loss": 0.0488, "step": 6651, "task_loss": 0.1512567400932312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990676668370809, "compression_loss": 0.0, "distillation_loss": 0.05502176284790039, "epoch": 6.32, "learning_rate": 1.1775903175323787e-06, "loss": 0.0604, "step": 6652, "task_loss": 0.10876131802797318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799069243499367, "compression_loss": 0.0, "distillation_loss": 0.026486551389098167, "epoch": 6.32, "learning_rate": 1.174360787745138e-06, "loss": 0.0318, "step": 6653, "task_loss": 0.07932649552822113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990708183831287, "compression_loss": 0.0, "distillation_loss": 0.09362354874610901, "epoch": 6.32, "learning_rate": 1.1711355860000079e-06, "loss": 0.091, "step": 6654, "task_loss": 0.06723588705062866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990723914893691, "compression_loss": 0.0, "distillation_loss": 0.13072776794433594, "epoch": 6.32, "learning_rate": 1.167914712882856e-06, "loss": 0.134, "step": 6655, "task_loss": 0.1634681522846222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990739628190924, "compression_loss": 0.0, "distillation_loss": 0.014383634552359581, "epoch": 6.32, "learning_rate": 1.1646981689787728e-06, "loss": 0.0228, "step": 6656, "task_loss": 0.09892392158508301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990755323733021, "compression_loss": 0.0, "distillation_loss": 0.013216648250818253, "epoch": 6.32, "learning_rate": 1.1614859548720603e-06, "loss": 0.0251, "step": 6657, "task_loss": 0.13216069340705872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990771001530017, "compression_loss": 0.0, "distillation_loss": 0.06353971362113953, "epoch": 6.32, "learning_rate": 1.1582780711462321e-06, "loss": 0.0629, "step": 6658, "task_loss": 0.057146959006786346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990786661591952, "compression_loss": 0.0, "distillation_loss": 0.06731215119361877, "epoch": 6.32, "learning_rate": 1.1550745183840139e-06, "loss": 0.0659, "step": 6659, "task_loss": 0.05347077175974846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799080230392886, "compression_loss": 0.0, "distillation_loss": 0.018060829490423203, "epoch": 6.32, "learning_rate": 1.1518752971673485e-06, "loss": 0.0169, "step": 6660, "task_loss": 0.006073372438549995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799081792855078, "compression_loss": 0.0, "distillation_loss": 0.05580337345600128, "epoch": 6.33, "learning_rate": 1.1486804080773877e-06, "loss": 0.0684, "step": 6661, "task_loss": 0.18163037300109863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990833535467747, "compression_loss": 0.0, "distillation_loss": 0.07261084020137787, "epoch": 6.33, "learning_rate": 1.1454898516945035e-06, "loss": 0.0715, "step": 6662, "task_loss": 0.061981115490198135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79908491246898, "compression_loss": 0.0, "distillation_loss": 0.031433992087841034, "epoch": 6.33, "learning_rate": 1.142303628598268e-06, "loss": 0.0484, "step": 6663, "task_loss": 0.20125338435173035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990864696226974, "compression_loss": 0.0, "distillation_loss": 0.057327330112457275, "epoch": 6.33, "learning_rate": 1.1391217393674825e-06, "loss": 0.0524, "step": 6664, "task_loss": 0.008000411093235016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990880250089306, "compression_loss": 0.0, "distillation_loss": 0.02260660007596016, "epoch": 6.33, "learning_rate": 1.1359441845801483e-06, "loss": 0.0213, "step": 6665, "task_loss": 0.00976894237101078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990895786286834, "compression_loss": 0.0, "distillation_loss": 0.019381048157811165, "epoch": 6.33, "learning_rate": 1.1327709648134787e-06, "loss": 0.0255, "step": 6666, "task_loss": 0.08072338998317719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990911304829592, "compression_loss": 0.0, "distillation_loss": 0.042968470603227615, "epoch": 6.33, "learning_rate": 1.1296020806439128e-06, "loss": 0.0399, "step": 6667, "task_loss": 0.012241264805197716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990926805727621, "compression_loss": 0.0, "distillation_loss": 0.05627443641424179, "epoch": 6.33, "learning_rate": 1.1264375326470926e-06, "loss": 0.0537, "step": 6668, "task_loss": 0.030838757753372192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990942288990954, "compression_loss": 0.0, "distillation_loss": 0.043274518102407455, "epoch": 6.33, "learning_rate": 1.1232773213978642e-06, "loss": 0.0483, "step": 6669, "task_loss": 0.09389711171388626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799095775462963, "compression_loss": 0.0, "distillation_loss": 0.027110418304800987, "epoch": 6.33, "learning_rate": 1.1201214474703043e-06, "loss": 0.0259, "step": 6670, "task_loss": 0.014662139117717743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990973202653685, "compression_loss": 0.0, "distillation_loss": 0.022032059729099274, "epoch": 6.34, "learning_rate": 1.1169699114376931e-06, "loss": 0.031, "step": 6671, "task_loss": 0.11220581084489822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7990988633073157, "compression_loss": 0.0, "distillation_loss": 0.014706656336784363, "epoch": 6.34, "learning_rate": 1.1138227138725171e-06, "loss": 0.0135, "step": 6672, "task_loss": 0.0031169112771749496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991004045898081, "compression_loss": 0.0, "distillation_loss": 0.030809402465820312, "epoch": 6.34, "learning_rate": 1.1106798553464804e-06, "loss": 0.0434, "step": 6673, "task_loss": 0.15683190524578094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991019441138495, "compression_loss": 0.0, "distillation_loss": 0.12929940223693848, "epoch": 6.34, "learning_rate": 1.1075413364305037e-06, "loss": 0.1301, "step": 6674, "task_loss": 0.1377396285533905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991034818804436, "compression_loss": 0.0, "distillation_loss": 0.017488310113549232, "epoch": 6.34, "learning_rate": 1.1044071576947118e-06, "loss": 0.016, "step": 6675, "task_loss": 0.0023362338542938232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799105017890594, "compression_loss": 0.0, "distillation_loss": 0.024973999708890915, "epoch": 6.34, "learning_rate": 1.101277319708438e-06, "loss": 0.0233, "step": 6676, "task_loss": 0.007980858907103539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991065521453045, "compression_loss": 0.0, "distillation_loss": 0.0375574491918087, "epoch": 6.34, "learning_rate": 1.0981518230402387e-06, "loss": 0.0393, "step": 6677, "task_loss": 0.05460818111896515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991080846455786, "compression_loss": 0.0, "distillation_loss": 0.04282548278570175, "epoch": 6.34, "learning_rate": 1.0950306682578709e-06, "loss": 0.041, "step": 6678, "task_loss": 0.024096237495541573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991096153924201, "compression_loss": 0.0, "distillation_loss": 0.08127694576978683, "epoch": 6.34, "learning_rate": 1.0919138559283143e-06, "loss": 0.0851, "step": 6679, "task_loss": 0.11989080905914307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991111443868326, "compression_loss": 0.0, "distillation_loss": 0.025165293365716934, "epoch": 6.34, "learning_rate": 1.0888013866177437e-06, "loss": 0.037, "step": 6680, "task_loss": 0.1433994174003601 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991126716298199, "compression_loss": 0.0, "distillation_loss": 0.027159132063388824, "epoch": 6.34, "learning_rate": 1.0856932608915627e-06, "loss": 0.0298, "step": 6681, "task_loss": 0.05401374772191048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991141971223857, "compression_loss": 0.0, "distillation_loss": 0.13656893372535706, "epoch": 6.35, "learning_rate": 1.0825894793143721e-06, "loss": 0.1396, "step": 6682, "task_loss": 0.16644863784313202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991157208655336, "compression_loss": 0.0, "distillation_loss": 0.05064243823289871, "epoch": 6.35, "learning_rate": 1.0794900424499876e-06, "loss": 0.0506, "step": 6683, "task_loss": 0.05057719722390175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991172428602672, "compression_loss": 0.0, "distillation_loss": 0.07734186947345734, "epoch": 6.35, "learning_rate": 1.0763949508614423e-06, "loss": 0.0739, "step": 6684, "task_loss": 0.04286087676882744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991187631075903, "compression_loss": 0.0, "distillation_loss": 0.08190207928419113, "epoch": 6.35, "learning_rate": 1.0733042051109726e-06, "loss": 0.0776, "step": 6685, "task_loss": 0.038602665066719055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991202816085067, "compression_loss": 0.0, "distillation_loss": 0.04311536252498627, "epoch": 6.35, "learning_rate": 1.070217805760021e-06, "loss": 0.0555, "step": 6686, "task_loss": 0.16687297821044922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991217983640198, "compression_loss": 0.0, "distillation_loss": 0.03107578307390213, "epoch": 6.35, "learning_rate": 1.0671357533692554e-06, "loss": 0.0346, "step": 6687, "task_loss": 0.06593281030654907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991233133751334, "compression_loss": 0.0, "distillation_loss": 0.023783832788467407, "epoch": 6.35, "learning_rate": 1.0640580484985424e-06, "loss": 0.027, "step": 6688, "task_loss": 0.05620795860886574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991248266428512, "compression_loss": 0.0, "distillation_loss": 0.09371946007013321, "epoch": 6.35, "learning_rate": 1.0609846917069622e-06, "loss": 0.088, "step": 6689, "task_loss": 0.03661830723285675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799126338168177, "compression_loss": 0.0, "distillation_loss": 0.22859099507331848, "epoch": 6.35, "learning_rate": 1.0579156835528015e-06, "loss": 0.2157, "step": 6690, "task_loss": 0.09983595460653305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991278479521143, "compression_loss": 0.0, "distillation_loss": 0.0681435763835907, "epoch": 6.35, "learning_rate": 1.0548510245935673e-06, "loss": 0.0716, "step": 6691, "task_loss": 0.10267331451177597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991293559956669, "compression_loss": 0.0, "distillation_loss": 0.029233068227767944, "epoch": 6.36, "learning_rate": 1.051790715385964e-06, "loss": 0.0305, "step": 6692, "task_loss": 0.041752371937036514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991308622998384, "compression_loss": 0.0, "distillation_loss": 0.02068510465323925, "epoch": 6.36, "learning_rate": 1.0487347564859113e-06, "loss": 0.0214, "step": 6693, "task_loss": 0.02766001597046852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991323668656325, "compression_loss": 0.0, "distillation_loss": 0.009869755245745182, "epoch": 6.36, "learning_rate": 1.0456831484485423e-06, "loss": 0.0093, "step": 6694, "task_loss": 0.003924252465367317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991338696940529, "compression_loss": 0.0, "distillation_loss": 0.06949397921562195, "epoch": 6.36, "learning_rate": 1.0426358918281948e-06, "loss": 0.0649, "step": 6695, "task_loss": 0.023511435836553574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991353707861033, "compression_loss": 0.0, "distillation_loss": 0.03212964907288551, "epoch": 6.36, "learning_rate": 1.0395929871784144e-06, "loss": 0.0365, "step": 6696, "task_loss": 0.07626573741436005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991368701427873, "compression_loss": 0.0, "distillation_loss": 0.08683188259601593, "epoch": 6.36, "learning_rate": 1.0365544350519646e-06, "loss": 0.0829, "step": 6697, "task_loss": 0.047511257231235504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991383677651087, "compression_loss": 0.0, "distillation_loss": 0.031793013215065, "epoch": 6.36, "learning_rate": 1.0335202360008124e-06, "loss": 0.0338, "step": 6698, "task_loss": 0.051480717957019806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991398636540712, "compression_loss": 0.0, "distillation_loss": 0.024651937186717987, "epoch": 6.36, "learning_rate": 1.0304903905761332e-06, "loss": 0.0227, "step": 6699, "task_loss": 0.0050739627331495285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991413578106783, "compression_loss": 0.0, "distillation_loss": 0.03288833424448967, "epoch": 6.36, "learning_rate": 1.0274648993283093e-06, "loss": 0.0333, "step": 6700, "task_loss": 0.03704278543591499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991428502359338, "compression_loss": 0.0, "distillation_loss": 0.07430990785360336, "epoch": 6.36, "learning_rate": 1.0244437628069425e-06, "loss": 0.0758, "step": 6701, "task_loss": 0.08946188539266586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991443409308414, "compression_loss": 0.0, "distillation_loss": 0.031692154705524445, "epoch": 6.36, "learning_rate": 1.0214269815608358e-06, "loss": 0.0424, "step": 6702, "task_loss": 0.1384189873933792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991458298964047, "compression_loss": 0.0, "distillation_loss": 0.03650377690792084, "epoch": 6.37, "learning_rate": 1.018414556137995e-06, "loss": 0.0546, "step": 6703, "task_loss": 0.21728603541851044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991473171336275, "compression_loss": 0.0, "distillation_loss": 0.14218676090240479, "epoch": 6.37, "learning_rate": 1.015406487085646e-06, "loss": 0.1351, "step": 6704, "task_loss": 0.07109387218952179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991488026435134, "compression_loss": 0.0, "distillation_loss": 0.08769318461418152, "epoch": 6.37, "learning_rate": 1.0124027749502246e-06, "loss": 0.092, "step": 6705, "task_loss": 0.13085408508777618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991502864270661, "compression_loss": 0.0, "distillation_loss": 0.0496244877576828, "epoch": 6.37, "learning_rate": 1.0094034202773634e-06, "loss": 0.0473, "step": 6706, "task_loss": 0.026335952803492546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991517684852892, "compression_loss": 0.0, "distillation_loss": 0.058904923498630524, "epoch": 6.37, "learning_rate": 1.006408423611907e-06, "loss": 0.0619, "step": 6707, "task_loss": 0.08892402052879333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991532488191866, "compression_loss": 0.0, "distillation_loss": 0.03160509839653969, "epoch": 6.37, "learning_rate": 1.0034177854979205e-06, "loss": 0.0358, "step": 6708, "task_loss": 0.07310265302658081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991547274297618, "compression_loss": 0.0, "distillation_loss": 0.02190888673067093, "epoch": 6.37, "learning_rate": 1.0004315064786608e-06, "loss": 0.0332, "step": 6709, "task_loss": 0.134353369474411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991562043180185, "compression_loss": 0.0, "distillation_loss": 0.02776992879807949, "epoch": 6.37, "learning_rate": 9.974495870965967e-07, "loss": 0.0338, "step": 6710, "task_loss": 0.08767490833997726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991576794849604, "compression_loss": 0.0, "distillation_loss": 0.03906933218240738, "epoch": 6.37, "learning_rate": 9.944720278934171e-07, "loss": 0.0357, "step": 6711, "task_loss": 0.005863867700099945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991591529315912, "compression_loss": 0.0, "distillation_loss": 0.05178176239132881, "epoch": 6.37, "learning_rate": 9.914988294100063e-07, "loss": 0.0492, "step": 6712, "task_loss": 0.026018494740128517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991606246589146, "compression_loss": 0.0, "distillation_loss": 0.018913403153419495, "epoch": 6.38, "learning_rate": 9.885299921864543e-07, "loss": 0.0217, "step": 6713, "task_loss": 0.04700911417603493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991620946679343, "compression_loss": 0.0, "distillation_loss": 0.045953452587127686, "epoch": 6.38, "learning_rate": 9.855655167620715e-07, "loss": 0.0522, "step": 6714, "task_loss": 0.10820292681455612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991635629596539, "compression_loss": 0.0, "distillation_loss": 0.09742830693721771, "epoch": 6.38, "learning_rate": 9.826054036753713e-07, "loss": 0.092, "step": 6715, "task_loss": 0.04326290637254715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991650295350771, "compression_loss": 0.0, "distillation_loss": 0.03743236884474754, "epoch": 6.38, "learning_rate": 9.796496534640653e-07, "loss": 0.0345, "step": 6716, "task_loss": 0.008360574021935463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991664943952076, "compression_loss": 0.0, "distillation_loss": 0.060953132808208466, "epoch": 6.38, "learning_rate": 9.766982666650826e-07, "loss": 0.0634, "step": 6717, "task_loss": 0.08522552996873856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991679575410491, "compression_loss": 0.0, "distillation_loss": 0.047717705368995667, "epoch": 6.38, "learning_rate": 9.737512438145579e-07, "loss": 0.053, "step": 6718, "task_loss": 0.10008193552494049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991694189736052, "compression_loss": 0.0, "distillation_loss": 0.024083293974399567, "epoch": 6.38, "learning_rate": 9.708085854478327e-07, "loss": 0.022, "step": 6719, "task_loss": 0.0036613382399082184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991708786938797, "compression_loss": 0.0, "distillation_loss": 0.05113540589809418, "epoch": 6.38, "learning_rate": 9.678702920994543e-07, "loss": 0.0567, "step": 6720, "task_loss": 0.1063266173005104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991723367028762, "compression_loss": 0.0, "distillation_loss": 0.028987891972064972, "epoch": 6.38, "learning_rate": 9.649363643031733e-07, "loss": 0.0268, "step": 6721, "task_loss": 0.007400134578347206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991737930015985, "compression_loss": 0.0, "distillation_loss": 0.04050924628973007, "epoch": 6.38, "learning_rate": 9.620068025919583e-07, "loss": 0.0462, "step": 6722, "task_loss": 0.097807377576828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991752475910502, "compression_loss": 0.0, "distillation_loss": 0.0265050008893013, "epoch": 6.38, "learning_rate": 9.590816074979774e-07, "loss": 0.0258, "step": 6723, "task_loss": 0.019627349451184273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991767004722349, "compression_loss": 0.0, "distillation_loss": 0.10371723771095276, "epoch": 6.39, "learning_rate": 9.561607795526007e-07, "loss": 0.1172, "step": 6724, "task_loss": 0.2384086400270462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991781516461564, "compression_loss": 0.0, "distillation_loss": 0.033512018620967865, "epoch": 6.39, "learning_rate": 9.532443192864199e-07, "loss": 0.0316, "step": 6725, "task_loss": 0.013961471617221832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991796011138184, "compression_loss": 0.0, "distillation_loss": 0.03146098554134369, "epoch": 6.39, "learning_rate": 9.50332227229217e-07, "loss": 0.0535, "step": 6726, "task_loss": 0.2520584762096405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991810488762244, "compression_loss": 0.0, "distillation_loss": 0.026955943554639816, "epoch": 6.39, "learning_rate": 9.474245039099882e-07, "loss": 0.0253, "step": 6727, "task_loss": 0.010826632380485535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991824949343782, "compression_loss": 0.0, "distillation_loss": 0.024500368162989616, "epoch": 6.39, "learning_rate": 9.445211498569362e-07, "loss": 0.0334, "step": 6728, "task_loss": 0.11393891274929047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991839392892836, "compression_loss": 0.0, "distillation_loss": 0.05089277774095535, "epoch": 6.39, "learning_rate": 9.416221655974722e-07, "loss": 0.0473, "step": 6729, "task_loss": 0.014700580388307571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991853819419441, "compression_loss": 0.0, "distillation_loss": 0.01643327623605728, "epoch": 6.39, "learning_rate": 9.387275516582056e-07, "loss": 0.0304, "step": 6730, "task_loss": 0.1562596559524536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991868228933635, "compression_loss": 0.0, "distillation_loss": 0.07891779392957687, "epoch": 6.39, "learning_rate": 9.358373085649602e-07, "loss": 0.0943, "step": 6731, "task_loss": 0.23281759023666382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991882621445454, "compression_loss": 0.0, "distillation_loss": 0.020287442952394485, "epoch": 6.39, "learning_rate": 9.329514368427633e-07, "loss": 0.0234, "step": 6732, "task_loss": 0.051706451922655106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991896996964936, "compression_loss": 0.0, "distillation_loss": 0.0532609187066555, "epoch": 6.39, "learning_rate": 9.300699370158456e-07, "loss": 0.0599, "step": 6733, "task_loss": 0.11954209208488464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991911355502115, "compression_loss": 0.0, "distillation_loss": 0.037625864148139954, "epoch": 6.4, "learning_rate": 9.271928096076493e-07, "loss": 0.0411, "step": 6734, "task_loss": 0.07242215424776077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991925697067032, "compression_loss": 0.0, "distillation_loss": 0.02129533141851425, "epoch": 6.4, "learning_rate": 9.243200551408094e-07, "loss": 0.0389, "step": 6735, "task_loss": 0.19778040051460266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799194002166972, "compression_loss": 0.0, "distillation_loss": 0.030612638220191002, "epoch": 6.4, "learning_rate": 9.214516741371831e-07, "loss": 0.0464, "step": 6736, "task_loss": 0.18832482397556305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991954329320218, "compression_loss": 0.0, "distillation_loss": 0.01869923248887062, "epoch": 6.4, "learning_rate": 9.185876671178262e-07, "loss": 0.0174, "step": 6737, "task_loss": 0.005990633741021156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991968620028562, "compression_loss": 0.0, "distillation_loss": 0.02486245334148407, "epoch": 6.4, "learning_rate": 9.157280346029918e-07, "loss": 0.0232, "step": 6738, "task_loss": 0.00861707329750061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799198289380479, "compression_loss": 0.0, "distillation_loss": 0.03501253202557564, "epoch": 6.4, "learning_rate": 9.128727771121531e-07, "loss": 0.0323, "step": 6739, "task_loss": 0.007670162245631218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7991997150658937, "compression_loss": 0.0, "distillation_loss": 0.041302017867565155, "epoch": 6.4, "learning_rate": 9.100218951639816e-07, "loss": 0.0531, "step": 6740, "task_loss": 0.15916121006011963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992011390601041, "compression_loss": 0.0, "distillation_loss": 0.056890472769737244, "epoch": 6.4, "learning_rate": 9.071753892763519e-07, "loss": 0.0589, "step": 6741, "task_loss": 0.0772441178560257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992025613641139, "compression_loss": 0.0, "distillation_loss": 0.02093365229666233, "epoch": 6.4, "learning_rate": 9.043332599663418e-07, "loss": 0.0277, "step": 6742, "task_loss": 0.08839461952447891 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992039819789267, "compression_loss": 0.0, "distillation_loss": 0.09711463004350662, "epoch": 6.4, "learning_rate": 9.014955077502413e-07, "loss": 0.1142, "step": 6743, "task_loss": 0.268157958984375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992054009055463, "compression_loss": 0.0, "distillation_loss": 0.0171291995793581, "epoch": 6.4, "learning_rate": 8.986621331435435e-07, "loss": 0.0159, "step": 6744, "task_loss": 0.004604209214448929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992068181449762, "compression_loss": 0.0, "distillation_loss": 0.09282074868679047, "epoch": 6.41, "learning_rate": 8.958331366609423e-07, "loss": 0.0897, "step": 6745, "task_loss": 0.061169348657131195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992082336982201, "compression_loss": 0.0, "distillation_loss": 0.07672721892595291, "epoch": 6.41, "learning_rate": 8.930085188163378e-07, "loss": 0.08, "step": 6746, "task_loss": 0.1096218079328537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799209647566282, "compression_loss": 0.0, "distillation_loss": 0.02508268877863884, "epoch": 6.41, "learning_rate": 8.90188280122839e-07, "loss": 0.0238, "step": 6747, "task_loss": 0.012722663581371307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992110597501652, "compression_loss": 0.0, "distillation_loss": 0.024517200887203217, "epoch": 6.41, "learning_rate": 8.87372421092747e-07, "loss": 0.0225, "step": 6748, "task_loss": 0.00447007454931736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992124702508735, "compression_loss": 0.0, "distillation_loss": 0.01891644299030304, "epoch": 6.41, "learning_rate": 8.845609422375861e-07, "loss": 0.0175, "step": 6749, "task_loss": 0.00444909930229187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992138790694107, "compression_loss": 0.0, "distillation_loss": 0.02350194752216339, "epoch": 6.41, "learning_rate": 8.817538440680728e-07, "loss": 0.0224, "step": 6750, "task_loss": 0.01221693865954876 }, { "epoch": 6.41, "eval_accuracy": 0.8910550458715596, "eval_loss": 0.43947499990463257, "eval_runtime": 18.2778, "eval_samples_per_second": 47.708, "eval_steps_per_second": 5.964, "step": 6750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992152862067803, "compression_loss": 0.0, "distillation_loss": 0.03163065388798714, "epoch": 6.41, "learning_rate": 8.78951127094127e-07, "loss": 0.0336, "step": 6751, "task_loss": 0.05086345970630646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992166916639861, "compression_loss": 0.0, "distillation_loss": 0.10874065011739731, "epoch": 6.41, "learning_rate": 8.761527918248775e-07, "loss": 0.1034, "step": 6752, "task_loss": 0.05520922690629959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992180954420318, "compression_loss": 0.0, "distillation_loss": 0.02465911954641342, "epoch": 6.41, "learning_rate": 8.733588387686537e-07, "loss": 0.0227, "step": 6753, "task_loss": 0.005142947658896446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992194975419211, "compression_loss": 0.0, "distillation_loss": 0.07918893545866013, "epoch": 6.41, "learning_rate": 8.705692684329969e-07, "loss": 0.0822, "step": 6754, "task_loss": 0.10922683775424957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992208979646576, "compression_loss": 0.0, "distillation_loss": 0.030382784083485603, "epoch": 6.42, "learning_rate": 8.677840813246352e-07, "loss": 0.0294, "step": 6755, "task_loss": 0.020388955250382423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992222967112449, "compression_loss": 0.0, "distillation_loss": 0.035736992955207825, "epoch": 6.42, "learning_rate": 8.650032779495165e-07, "loss": 0.0331, "step": 6756, "task_loss": 0.009340623393654823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992236937826869, "compression_loss": 0.0, "distillation_loss": 0.045183077454566956, "epoch": 6.42, "learning_rate": 8.622268588127924e-07, "loss": 0.0426, "step": 6757, "task_loss": 0.019841017201542854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799225089179987, "compression_loss": 0.0, "distillation_loss": 0.0189533494412899, "epoch": 6.42, "learning_rate": 8.594548244188067e-07, "loss": 0.0177, "step": 6758, "task_loss": 0.006474364548921585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992264829041492, "compression_loss": 0.0, "distillation_loss": 0.024439578875899315, "epoch": 6.42, "learning_rate": 8.56687175271112e-07, "loss": 0.0229, "step": 6759, "task_loss": 0.008742136880755424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992278749561771, "compression_loss": 0.0, "distillation_loss": 0.026206394657492638, "epoch": 6.42, "learning_rate": 8.539239118724701e-07, "loss": 0.0266, "step": 6760, "task_loss": 0.03009379468858242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992292653370742, "compression_loss": 0.0, "distillation_loss": 0.016620401293039322, "epoch": 6.42, "learning_rate": 8.511650347248406e-07, "loss": 0.017, "step": 6761, "task_loss": 0.02082774229347706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992306540478442, "compression_loss": 0.0, "distillation_loss": 0.025677714496850967, "epoch": 6.42, "learning_rate": 8.484105443293783e-07, "loss": 0.0237, "step": 6762, "task_loss": 0.005807174369692802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992320410894911, "compression_loss": 0.0, "distillation_loss": 0.024803977459669113, "epoch": 6.42, "learning_rate": 8.456604411864605e-07, "loss": 0.0229, "step": 6763, "task_loss": 0.005409408360719681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992334264630182, "compression_loss": 0.0, "distillation_loss": 0.09612073004245758, "epoch": 6.42, "learning_rate": 8.429147257956516e-07, "loss": 0.0983, "step": 6764, "task_loss": 0.11760664731264114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992348101694294, "compression_loss": 0.0, "distillation_loss": 0.05043857544660568, "epoch": 6.42, "learning_rate": 8.401733986557247e-07, "loss": 0.0493, "step": 6765, "task_loss": 0.03950589895248413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992361922097283, "compression_loss": 0.0, "distillation_loss": 0.05412200465798378, "epoch": 6.43, "learning_rate": 8.374364602646511e-07, "loss": 0.0535, "step": 6766, "task_loss": 0.04812926799058914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992375725849187, "compression_loss": 0.0, "distillation_loss": 0.02848942205309868, "epoch": 6.43, "learning_rate": 8.347039111196164e-07, "loss": 0.0328, "step": 6767, "task_loss": 0.07136546820402145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992389512960041, "compression_loss": 0.0, "distillation_loss": 0.11072133481502533, "epoch": 6.43, "learning_rate": 8.319757517169985e-07, "loss": 0.12, "step": 6768, "task_loss": 0.2039954513311386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992403283439883, "compression_loss": 0.0, "distillation_loss": 0.02107694186270237, "epoch": 6.43, "learning_rate": 8.29251982552376e-07, "loss": 0.0194, "step": 6769, "task_loss": 0.004783786833286285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992417037298749, "compression_loss": 0.0, "distillation_loss": 0.013939835131168365, "epoch": 6.43, "learning_rate": 8.265326041205417e-07, "loss": 0.0129, "step": 6770, "task_loss": 0.0032312143594026566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992430774546677, "compression_loss": 0.0, "distillation_loss": 0.04151005297899246, "epoch": 6.43, "learning_rate": 8.238176169154816e-07, "loss": 0.0465, "step": 6771, "task_loss": 0.09118230640888214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992444495193702, "compression_loss": 0.0, "distillation_loss": 0.06526434421539307, "epoch": 6.43, "learning_rate": 8.211070214303812e-07, "loss": 0.0745, "step": 6772, "task_loss": 0.15805087983608246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992458199249863, "compression_loss": 0.0, "distillation_loss": 0.024169165641069412, "epoch": 6.43, "learning_rate": 8.184008181576386e-07, "loss": 0.0227, "step": 6773, "task_loss": 0.009067356586456299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992471886725196, "compression_loss": 0.0, "distillation_loss": 0.029119737446308136, "epoch": 6.43, "learning_rate": 8.156990075888466e-07, "loss": 0.0281, "step": 6774, "task_loss": 0.0189868975430727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992485557629738, "compression_loss": 0.0, "distillation_loss": 0.03186260536313057, "epoch": 6.43, "learning_rate": 8.130015902148042e-07, "loss": 0.0347, "step": 6775, "task_loss": 0.06021621823310852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992499211973525, "compression_loss": 0.0, "distillation_loss": 0.02992023155093193, "epoch": 6.43, "learning_rate": 8.103085665255084e-07, "loss": 0.0543, "step": 6776, "task_loss": 0.27328985929489136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992512849766594, "compression_loss": 0.0, "distillation_loss": 0.04072340205311775, "epoch": 6.44, "learning_rate": 8.076199370101594e-07, "loss": 0.0409, "step": 6777, "task_loss": 0.042335450649261475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992526471018983, "compression_loss": 0.0, "distillation_loss": 0.04745135456323624, "epoch": 6.44, "learning_rate": 8.04935702157164e-07, "loss": 0.0502, "step": 6778, "task_loss": 0.07529226690530777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992540075740727, "compression_loss": 0.0, "distillation_loss": 0.04864822328090668, "epoch": 6.44, "learning_rate": 8.022558624541182e-07, "loss": 0.0524, "step": 6779, "task_loss": 0.08609342575073242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992553663941864, "compression_loss": 0.0, "distillation_loss": 0.11904314160346985, "epoch": 6.44, "learning_rate": 7.995804183878353e-07, "loss": 0.1204, "step": 6780, "task_loss": 0.13219860196113586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992567235632431, "compression_loss": 0.0, "distillation_loss": 0.0507618710398674, "epoch": 6.44, "learning_rate": 7.969093704443209e-07, "loss": 0.0556, "step": 6781, "task_loss": 0.09945084899663925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992580790822463, "compression_loss": 0.0, "distillation_loss": 0.08536731451749802, "epoch": 6.44, "learning_rate": 7.942427191087786e-07, "loss": 0.0833, "step": 6782, "task_loss": 0.06510312855243683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992594329521999, "compression_loss": 0.0, "distillation_loss": 0.09828724712133408, "epoch": 6.44, "learning_rate": 7.915804648656239e-07, "loss": 0.1016, "step": 6783, "task_loss": 0.13165351748466492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992607851741076, "compression_loss": 0.0, "distillation_loss": 0.06299099326133728, "epoch": 6.44, "learning_rate": 7.889226081984696e-07, "loss": 0.0704, "step": 6784, "task_loss": 0.13664023578166962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992621357489729, "compression_loss": 0.0, "distillation_loss": 0.017018694430589676, "epoch": 6.44, "learning_rate": 7.862691495901243e-07, "loss": 0.0163, "step": 6785, "task_loss": 0.00940592773258686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992634846777995, "compression_loss": 0.0, "distillation_loss": 0.08001522719860077, "epoch": 6.44, "learning_rate": 7.83620089522602e-07, "loss": 0.0758, "step": 6786, "task_loss": 0.038091909140348434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992648319615913, "compression_loss": 0.0, "distillation_loss": 0.013534367084503174, "epoch": 6.45, "learning_rate": 7.809754284771181e-07, "loss": 0.0144, "step": 6787, "task_loss": 0.0219335425645113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992661776013518, "compression_loss": 0.0, "distillation_loss": 0.03589925542473793, "epoch": 6.45, "learning_rate": 7.783351669340882e-07, "loss": 0.0332, "step": 6788, "task_loss": 0.009313993155956268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992675215980846, "compression_loss": 0.0, "distillation_loss": 0.044496770948171616, "epoch": 6.45, "learning_rate": 7.756993053731259e-07, "loss": 0.0418, "step": 6789, "task_loss": 0.01771704852581024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992688639527935, "compression_loss": 0.0, "distillation_loss": 0.025459101423621178, "epoch": 6.45, "learning_rate": 7.730678442730538e-07, "loss": 0.0239, "step": 6790, "task_loss": 0.009527930989861488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992702046664822, "compression_loss": 0.0, "distillation_loss": 0.02458195760846138, "epoch": 6.45, "learning_rate": 7.704407841118811e-07, "loss": 0.0229, "step": 6791, "task_loss": 0.007813390344381332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992715437401545, "compression_loss": 0.0, "distillation_loss": 0.01990782469511032, "epoch": 6.45, "learning_rate": 7.678181253668343e-07, "loss": 0.0228, "step": 6792, "task_loss": 0.04860430955886841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992728811748139, "compression_loss": 0.0, "distillation_loss": 0.024153951555490494, "epoch": 6.45, "learning_rate": 7.651998685143269e-07, "loss": 0.0354, "step": 6793, "task_loss": 0.13703292608261108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799274216971464, "compression_loss": 0.0, "distillation_loss": 0.02501821331679821, "epoch": 6.45, "learning_rate": 7.625860140299811e-07, "loss": 0.0231, "step": 6794, "task_loss": 0.006082385778427124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992755511311087, "compression_loss": 0.0, "distillation_loss": 0.06825748831033707, "epoch": 6.45, "learning_rate": 7.599765623886146e-07, "loss": 0.0673, "step": 6795, "task_loss": 0.05868315324187279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992768836547516, "compression_loss": 0.0, "distillation_loss": 0.040932249277830124, "epoch": 6.45, "learning_rate": 7.573715140642451e-07, "loss": 0.0558, "step": 6796, "task_loss": 0.18955527245998383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992782145433963, "compression_loss": 0.0, "distillation_loss": 0.03785280883312225, "epoch": 6.45, "learning_rate": 7.547708695300942e-07, "loss": 0.0502, "step": 6797, "task_loss": 0.16129517555236816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992795437980466, "compression_loss": 0.0, "distillation_loss": 0.02329355478286743, "epoch": 6.46, "learning_rate": 7.521746292585841e-07, "loss": 0.0357, "step": 6798, "task_loss": 0.1471756100654602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799280871419706, "compression_loss": 0.0, "distillation_loss": 0.03262361139059067, "epoch": 6.46, "learning_rate": 7.49582793721329e-07, "loss": 0.0336, "step": 6799, "task_loss": 0.04221324622631073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992821974093786, "compression_loss": 0.0, "distillation_loss": 0.03289227932691574, "epoch": 6.46, "learning_rate": 7.469953633891469e-07, "loss": 0.0401, "step": 6800, "task_loss": 0.10484490543603897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992835217680675, "compression_loss": 0.0, "distillation_loss": 0.07555963844060898, "epoch": 6.46, "learning_rate": 7.444123387320645e-07, "loss": 0.0751, "step": 6801, "task_loss": 0.07083379477262497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992848444967768, "compression_loss": 0.0, "distillation_loss": 0.03143364191055298, "epoch": 6.46, "learning_rate": 7.418337202192982e-07, "loss": 0.0313, "step": 6802, "task_loss": 0.03013058938086033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992861655965101, "compression_loss": 0.0, "distillation_loss": 0.03767804428935051, "epoch": 6.46, "learning_rate": 7.392595083192622e-07, "loss": 0.0348, "step": 6803, "task_loss": 0.009084422141313553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992874850682711, "compression_loss": 0.0, "distillation_loss": 0.03183813393115997, "epoch": 6.46, "learning_rate": 7.366897034995796e-07, "loss": 0.0295, "step": 6804, "task_loss": 0.008527562022209167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992888029130634, "compression_loss": 0.0, "distillation_loss": 0.022205591201782227, "epoch": 6.46, "learning_rate": 7.341243062270686e-07, "loss": 0.028, "step": 6805, "task_loss": 0.0802656039595604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992901191318905, "compression_loss": 0.0, "distillation_loss": 0.036016229540109634, "epoch": 6.46, "learning_rate": 7.315633169677399e-07, "loss": 0.0352, "step": 6806, "task_loss": 0.027843181043863297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992914337257565, "compression_loss": 0.0, "distillation_loss": 0.03833125904202461, "epoch": 6.46, "learning_rate": 7.290067361868103e-07, "loss": 0.0388, "step": 6807, "task_loss": 0.0432337149977684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992927466956649, "compression_loss": 0.0, "distillation_loss": 0.01719411462545395, "epoch": 6.47, "learning_rate": 7.264545643486997e-07, "loss": 0.0272, "step": 6808, "task_loss": 0.11720463633537292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992940580426192, "compression_loss": 0.0, "distillation_loss": 0.10508248209953308, "epoch": 6.47, "learning_rate": 7.239068019170209e-07, "loss": 0.1087, "step": 6809, "task_loss": 0.14166167378425598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992953677676233, "compression_loss": 0.0, "distillation_loss": 0.04110556095838547, "epoch": 6.47, "learning_rate": 7.21363449354584e-07, "loss": 0.0467, "step": 6810, "task_loss": 0.09688466787338257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992966758716809, "compression_loss": 0.0, "distillation_loss": 0.0683632344007492, "epoch": 6.47, "learning_rate": 7.188245071234057e-07, "loss": 0.0735, "step": 6811, "task_loss": 0.11992549151182175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7992979823557955, "compression_loss": 0.0, "distillation_loss": 0.03459140658378601, "epoch": 6.47, "learning_rate": 7.162899756846975e-07, "loss": 0.0356, "step": 6812, "task_loss": 0.04442333057522774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799299287220971, "compression_loss": 0.0, "distillation_loss": 0.05156092718243599, "epoch": 6.47, "learning_rate": 7.137598554988633e-07, "loss": 0.0484, "step": 6813, "task_loss": 0.019665861502289772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993005904682109, "compression_loss": 0.0, "distillation_loss": 0.07404929399490356, "epoch": 6.47, "learning_rate": 7.112341470255163e-07, "loss": 0.0795, "step": 6814, "task_loss": 0.12879236042499542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993018920985189, "compression_loss": 0.0, "distillation_loss": 0.055263321846723557, "epoch": 6.47, "learning_rate": 7.087128507234642e-07, "loss": 0.0582, "step": 6815, "task_loss": 0.08427795022726059 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993031921128988, "compression_loss": 0.0, "distillation_loss": 0.06862097978591919, "epoch": 6.47, "learning_rate": 7.061959670507102e-07, "loss": 0.0758, "step": 6816, "task_loss": 0.1407826542854309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993044905123541, "compression_loss": 0.0, "distillation_loss": 0.030200235545635223, "epoch": 6.47, "learning_rate": 7.036834964644523e-07, "loss": 0.0385, "step": 6817, "task_loss": 0.11273118853569031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993057872978887, "compression_loss": 0.0, "distillation_loss": 0.015482475981116295, "epoch": 6.47, "learning_rate": 7.011754394211061e-07, "loss": 0.0144, "step": 6818, "task_loss": 0.004941888153553009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993070824705062, "compression_loss": 0.0, "distillation_loss": 0.05421324819326401, "epoch": 6.48, "learning_rate": 6.986717963762656e-07, "loss": 0.0614, "step": 6819, "task_loss": 0.12607041001319885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993083760312102, "compression_loss": 0.0, "distillation_loss": 0.05300523713231087, "epoch": 6.48, "learning_rate": 6.961725677847308e-07, "loss": 0.0534, "step": 6820, "task_loss": 0.0573628805577755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993096679810044, "compression_loss": 0.0, "distillation_loss": 0.031698837876319885, "epoch": 6.48, "learning_rate": 6.936777541004941e-07, "loss": 0.0293, "step": 6821, "task_loss": 0.007633762434124947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993109583208926, "compression_loss": 0.0, "distillation_loss": 0.053836889564991, "epoch": 6.48, "learning_rate": 6.911873557767568e-07, "loss": 0.0578, "step": 6822, "task_loss": 0.09317664802074432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993122470518784, "compression_loss": 0.0, "distillation_loss": 0.02741752751171589, "epoch": 6.48, "learning_rate": 6.88701373265907e-07, "loss": 0.0269, "step": 6823, "task_loss": 0.022505706176161766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993135341749654, "compression_loss": 0.0, "distillation_loss": 0.05598681792616844, "epoch": 6.48, "learning_rate": 6.862198070195363e-07, "loss": 0.0687, "step": 6824, "task_loss": 0.18340638279914856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993148196911575, "compression_loss": 0.0, "distillation_loss": 0.060910556465387344, "epoch": 6.48, "learning_rate": 6.837426574884342e-07, "loss": 0.0622, "step": 6825, "task_loss": 0.07380431145429611 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993161036014581, "compression_loss": 0.0, "distillation_loss": 0.03215963393449783, "epoch": 6.48, "learning_rate": 6.812699251225907e-07, "loss": 0.0296, "step": 6826, "task_loss": 0.006164673715829849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993173859068712, "compression_loss": 0.0, "distillation_loss": 0.0726788341999054, "epoch": 6.48, "learning_rate": 6.788016103711825e-07, "loss": 0.0694, "step": 6827, "task_loss": 0.039418820291757584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993186666084002, "compression_loss": 0.0, "distillation_loss": 0.042292624711990356, "epoch": 6.48, "learning_rate": 6.763377136825927e-07, "loss": 0.039, "step": 6828, "task_loss": 0.009175049141049385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993199457070489, "compression_loss": 0.0, "distillation_loss": 0.018559550866484642, "epoch": 6.49, "learning_rate": 6.738782355044049e-07, "loss": 0.0244, "step": 6829, "task_loss": 0.07653959095478058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799321223203821, "compression_loss": 0.0, "distillation_loss": 0.14147916436195374, "epoch": 6.49, "learning_rate": 6.714231762833894e-07, "loss": 0.1379, "step": 6830, "task_loss": 0.10579898953437805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993224990997202, "compression_loss": 0.0, "distillation_loss": 0.02243266999721527, "epoch": 6.49, "learning_rate": 6.689725364655203e-07, "loss": 0.0242, "step": 6831, "task_loss": 0.04005550593137741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993237733957501, "compression_loss": 0.0, "distillation_loss": 0.0320717990398407, "epoch": 6.49, "learning_rate": 6.665263164959745e-07, "loss": 0.049, "step": 6832, "task_loss": 0.20118948817253113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993250460929144, "compression_loss": 0.0, "distillation_loss": 0.02864461950957775, "epoch": 6.49, "learning_rate": 6.640845168191107e-07, "loss": 0.0266, "step": 6833, "task_loss": 0.008286131545901299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993263171922169, "compression_loss": 0.0, "distillation_loss": 0.014984526671469212, "epoch": 6.49, "learning_rate": 6.616471378784961e-07, "loss": 0.014, "step": 6834, "task_loss": 0.0047838687896728516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993275866946612, "compression_loss": 0.0, "distillation_loss": 0.0654832050204277, "epoch": 6.49, "learning_rate": 6.592141801168933e-07, "loss": 0.0741, "step": 6835, "task_loss": 0.15198421478271484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993288546012509, "compression_loss": 0.0, "distillation_loss": 0.030285434797406197, "epoch": 6.49, "learning_rate": 6.567856439762654e-07, "loss": 0.0406, "step": 6836, "task_loss": 0.13304871320724487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993301209129897, "compression_loss": 0.0, "distillation_loss": 0.052962590008974075, "epoch": 6.49, "learning_rate": 6.543615298977623e-07, "loss": 0.0483, "step": 6837, "task_loss": 0.006451290100812912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993313856308815, "compression_loss": 0.0, "distillation_loss": 0.07559768110513687, "epoch": 6.49, "learning_rate": 6.519418383217347e-07, "loss": 0.0742, "step": 6838, "task_loss": 0.061944808810949326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993326487559297, "compression_loss": 0.0, "distillation_loss": 0.042924731969833374, "epoch": 6.49, "learning_rate": 6.495265696877361e-07, "loss": 0.0429, "step": 6839, "task_loss": 0.04244181513786316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993339102891381, "compression_loss": 0.0, "distillation_loss": 0.021773064509034157, "epoch": 6.5, "learning_rate": 6.471157244345105e-07, "loss": 0.0202, "step": 6840, "task_loss": 0.005988283082842827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993351702315104, "compression_loss": 0.0, "distillation_loss": 0.01895114779472351, "epoch": 6.5, "learning_rate": 6.447093029999935e-07, "loss": 0.0179, "step": 6841, "task_loss": 0.008197702467441559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993364285840503, "compression_loss": 0.0, "distillation_loss": 0.06650319695472717, "epoch": 6.5, "learning_rate": 6.423073058213325e-07, "loss": 0.0657, "step": 6842, "task_loss": 0.058126937597990036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993376853477614, "compression_loss": 0.0, "distillation_loss": 0.030550938099622726, "epoch": 6.5, "learning_rate": 6.399097333348536e-07, "loss": 0.0346, "step": 6843, "task_loss": 0.0713091567158699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993389405236474, "compression_loss": 0.0, "distillation_loss": 0.01258410420268774, "epoch": 6.5, "learning_rate": 6.375165859760946e-07, "loss": 0.0116, "step": 6844, "task_loss": 0.003225104883313179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993401941127121, "compression_loss": 0.0, "distillation_loss": 0.04864715039730072, "epoch": 6.5, "learning_rate": 6.351278641797742e-07, "loss": 0.0461, "step": 6845, "task_loss": 0.023460451513528824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799341446115959, "compression_loss": 0.0, "distillation_loss": 0.10957182943820953, "epoch": 6.5, "learning_rate": 6.327435683798233e-07, "loss": 0.1103, "step": 6846, "task_loss": 0.1172788143157959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993426965343919, "compression_loss": 0.0, "distillation_loss": 0.050183918327093124, "epoch": 6.5, "learning_rate": 6.303636990093592e-07, "loss": 0.059, "step": 6847, "task_loss": 0.13854621350765228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993439453690145, "compression_loss": 0.0, "distillation_loss": 0.013963685370981693, "epoch": 6.5, "learning_rate": 6.279882565006889e-07, "loss": 0.0201, "step": 6848, "task_loss": 0.07548151910305023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993451926208305, "compression_loss": 0.0, "distillation_loss": 0.0308124627918005, "epoch": 6.5, "learning_rate": 6.256172412853339e-07, "loss": 0.0313, "step": 6849, "task_loss": 0.0358404815196991 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993464382908434, "compression_loss": 0.0, "distillation_loss": 0.006880389526486397, "epoch": 6.51, "learning_rate": 6.232506537939941e-07, "loss": 0.0065, "step": 6850, "task_loss": 0.00280972383916378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799347682380057, "compression_loss": 0.0, "distillation_loss": 0.03846500813961029, "epoch": 6.51, "learning_rate": 6.208884944565702e-07, "loss": 0.0352, "step": 6851, "task_loss": 0.005746711045503616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993489248894751, "compression_loss": 0.0, "distillation_loss": 0.0295537281781435, "epoch": 6.51, "learning_rate": 6.185307637021631e-07, "loss": 0.0389, "step": 6852, "task_loss": 0.12259507924318314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993501658201012, "compression_loss": 0.0, "distillation_loss": 0.08173021674156189, "epoch": 6.51, "learning_rate": 6.161774619590666e-07, "loss": 0.0784, "step": 6853, "task_loss": 0.048187751322984695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799351405172939, "compression_loss": 0.0, "distillation_loss": 0.10911855846643448, "epoch": 6.51, "learning_rate": 6.138285896547691e-07, "loss": 0.1035, "step": 6854, "task_loss": 0.05309395492076874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993526429489923, "compression_loss": 0.0, "distillation_loss": 0.06665603071451187, "epoch": 6.51, "learning_rate": 6.114841472159516e-07, "loss": 0.0703, "step": 6855, "task_loss": 0.10340512543916702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993538791492647, "compression_loss": 0.0, "distillation_loss": 0.013052813708782196, "epoch": 6.51, "learning_rate": 6.091441350684957e-07, "loss": 0.0121, "step": 6856, "task_loss": 0.003964599221944809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993551137747599, "compression_loss": 0.0, "distillation_loss": 0.030018752440810204, "epoch": 6.51, "learning_rate": 6.068085536374752e-07, "loss": 0.0292, "step": 6857, "task_loss": 0.02144923247396946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993563468264816, "compression_loss": 0.0, "distillation_loss": 0.020197410136461258, "epoch": 6.51, "learning_rate": 6.04477403347159e-07, "loss": 0.0321, "step": 6858, "task_loss": 0.13970831036567688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993575783054334, "compression_loss": 0.0, "distillation_loss": 0.052353113889694214, "epoch": 6.51, "learning_rate": 6.02150684621014e-07, "loss": 0.0531, "step": 6859, "task_loss": 0.059832729399204254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993588082126191, "compression_loss": 0.0, "distillation_loss": 0.04445452615618706, "epoch": 6.51, "learning_rate": 5.998283978816966e-07, "loss": 0.0456, "step": 6860, "task_loss": 0.05596046522259712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993600365490422, "compression_loss": 0.0, "distillation_loss": 0.10008314251899719, "epoch": 6.52, "learning_rate": 5.975105435510637e-07, "loss": 0.1065, "step": 6861, "task_loss": 0.1641978919506073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993612633157067, "compression_loss": 0.0, "distillation_loss": 0.02374127134680748, "epoch": 6.52, "learning_rate": 5.951971220501645e-07, "loss": 0.0222, "step": 6862, "task_loss": 0.008045762777328491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993624885136159, "compression_loss": 0.0, "distillation_loss": 0.04164247214794159, "epoch": 6.52, "learning_rate": 5.928881337992437e-07, "loss": 0.0465, "step": 6863, "task_loss": 0.08977600187063217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993637121437738, "compression_loss": 0.0, "distillation_loss": 0.06360233575105667, "epoch": 6.52, "learning_rate": 5.905835792177406e-07, "loss": 0.088, "step": 6864, "task_loss": 0.30765628814697266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993649342071839, "compression_loss": 0.0, "distillation_loss": 0.03090558759868145, "epoch": 6.52, "learning_rate": 5.882834587242842e-07, "loss": 0.043, "step": 6865, "task_loss": 0.15227064490318298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79936615470485, "compression_loss": 0.0, "distillation_loss": 0.02005811221897602, "epoch": 6.52, "learning_rate": 5.859877727367069e-07, "loss": 0.0293, "step": 6866, "task_loss": 0.11207105964422226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993673736377757, "compression_loss": 0.0, "distillation_loss": 0.06416010856628418, "epoch": 6.52, "learning_rate": 5.836965216720309e-07, "loss": 0.0746, "step": 6867, "task_loss": 0.16828285157680511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993685910069646, "compression_loss": 0.0, "distillation_loss": 0.09754236787557602, "epoch": 6.52, "learning_rate": 5.814097059464702e-07, "loss": 0.0941, "step": 6868, "task_loss": 0.06287850439548492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993698068134205, "compression_loss": 0.0, "distillation_loss": 0.016108129173517227, "epoch": 6.52, "learning_rate": 5.79127325975437e-07, "loss": 0.0156, "step": 6869, "task_loss": 0.010948818176984787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993710210581472, "compression_loss": 0.0, "distillation_loss": 0.04941096901893616, "epoch": 6.52, "learning_rate": 5.768493821735387e-07, "loss": 0.0568, "step": 6870, "task_loss": 0.12317119538784027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799372233742148, "compression_loss": 0.0, "distillation_loss": 0.06292635947465897, "epoch": 6.53, "learning_rate": 5.745758749545749e-07, "loss": 0.0747, "step": 6871, "task_loss": 0.18059583008289337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799373444866427, "compression_loss": 0.0, "distillation_loss": 0.028696933761239052, "epoch": 6.53, "learning_rate": 5.723068047315344e-07, "loss": 0.0358, "step": 6872, "task_loss": 0.09973844140768051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993746544319877, "compression_loss": 0.0, "distillation_loss": 0.10228434950113297, "epoch": 6.53, "learning_rate": 5.7004217191661e-07, "loss": 0.0967, "step": 6873, "task_loss": 0.04644036293029785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993758624398338, "compression_loss": 0.0, "distillation_loss": 0.05492546781897545, "epoch": 6.53, "learning_rate": 5.677819769211807e-07, "loss": 0.0512, "step": 6874, "task_loss": 0.01718452386558056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799377068890969, "compression_loss": 0.0, "distillation_loss": 0.0156172476708889, "epoch": 6.53, "learning_rate": 5.655262201558209e-07, "loss": 0.0151, "step": 6875, "task_loss": 0.010073995217680931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993782737863969, "compression_loss": 0.0, "distillation_loss": 0.04025673121213913, "epoch": 6.53, "learning_rate": 5.63274902030303e-07, "loss": 0.0425, "step": 6876, "task_loss": 0.06272687762975693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993794771271213, "compression_loss": 0.0, "distillation_loss": 0.11300121247768402, "epoch": 6.53, "learning_rate": 5.610280229535858e-07, "loss": 0.1092, "step": 6877, "task_loss": 0.07530970871448517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993806789141458, "compression_loss": 0.0, "distillation_loss": 0.03029126301407814, "epoch": 6.53, "learning_rate": 5.58785583333829e-07, "loss": 0.0353, "step": 6878, "task_loss": 0.08056721091270447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799381879148474, "compression_loss": 0.0, "distillation_loss": 0.02235785312950611, "epoch": 6.53, "learning_rate": 5.56547583578379e-07, "loss": 0.0225, "step": 6879, "task_loss": 0.023482179269194603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993830778311097, "compression_loss": 0.0, "distillation_loss": 0.026940112933516502, "epoch": 6.53, "learning_rate": 5.543140240937827e-07, "loss": 0.0271, "step": 6880, "task_loss": 0.02874702401459217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993842749630566, "compression_loss": 0.0, "distillation_loss": 0.03313375264406204, "epoch": 6.53, "learning_rate": 5.520849052857768e-07, "loss": 0.0304, "step": 6881, "task_loss": 0.005804285407066345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993854705453185, "compression_loss": 0.0, "distillation_loss": 0.05707018822431564, "epoch": 6.54, "learning_rate": 5.498602275592873e-07, "loss": 0.053, "step": 6882, "task_loss": 0.01648840121924877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993866645788987, "compression_loss": 0.0, "distillation_loss": 0.042155489325523376, "epoch": 6.54, "learning_rate": 5.476399913184438e-07, "loss": 0.0563, "step": 6883, "task_loss": 0.1832885593175888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993878570648013, "compression_loss": 0.0, "distillation_loss": 0.01913582533597946, "epoch": 6.54, "learning_rate": 5.454241969665597e-07, "loss": 0.0177, "step": 6884, "task_loss": 0.005137601867318153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993890480040297, "compression_loss": 0.0, "distillation_loss": 0.08212343603372574, "epoch": 6.54, "learning_rate": 5.432128449061464e-07, "loss": 0.085, "step": 6885, "task_loss": 0.11137494444847107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993902373975876, "compression_loss": 0.0, "distillation_loss": 0.034957289695739746, "epoch": 6.54, "learning_rate": 5.410059355388964e-07, "loss": 0.0336, "step": 6886, "task_loss": 0.020979827269911766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993914252464789, "compression_loss": 0.0, "distillation_loss": 0.026353981345891953, "epoch": 6.54, "learning_rate": 5.388034692657223e-07, "loss": 0.0242, "step": 6887, "task_loss": 0.004370139911770821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799392611551707, "compression_loss": 0.0, "distillation_loss": 0.037230148911476135, "epoch": 6.54, "learning_rate": 5.366054464867016e-07, "loss": 0.0342, "step": 6888, "task_loss": 0.006543133407831192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993937963142759, "compression_loss": 0.0, "distillation_loss": 0.019730228930711746, "epoch": 6.54, "learning_rate": 5.344118676011172e-07, "loss": 0.0349, "step": 6889, "task_loss": 0.17101413011550903 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799394979535189, "compression_loss": 0.0, "distillation_loss": 0.026037897914648056, "epoch": 6.54, "learning_rate": 5.322227330074481e-07, "loss": 0.0328, "step": 6890, "task_loss": 0.09383513033390045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993961612154501, "compression_loss": 0.0, "distillation_loss": 0.029655098915100098, "epoch": 6.54, "learning_rate": 5.300380431033564e-07, "loss": 0.0276, "step": 6891, "task_loss": 0.009574664756655693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993973413560629, "compression_loss": 0.0, "distillation_loss": 0.06856879591941833, "epoch": 6.55, "learning_rate": 5.278577982857025e-07, "loss": 0.0743, "step": 6892, "task_loss": 0.12565253674983978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799398519958031, "compression_loss": 0.0, "distillation_loss": 0.049631062895059586, "epoch": 6.55, "learning_rate": 5.25681998950539e-07, "loss": 0.046, "step": 6893, "task_loss": 0.013276774436235428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7993996970223581, "compression_loss": 0.0, "distillation_loss": 0.05079222470521927, "epoch": 6.55, "learning_rate": 5.235106454931083e-07, "loss": 0.0519, "step": 6894, "task_loss": 0.06189654394984245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799400872550048, "compression_loss": 0.0, "distillation_loss": 0.017339782789349556, "epoch": 6.55, "learning_rate": 5.213437383078501e-07, "loss": 0.0241, "step": 6895, "task_loss": 0.08445850759744644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994020465421043, "compression_loss": 0.0, "distillation_loss": 0.08581778407096863, "epoch": 6.55, "learning_rate": 5.191812777883915e-07, "loss": 0.1178, "step": 6896, "task_loss": 0.40567508339881897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994032189995306, "compression_loss": 0.0, "distillation_loss": 0.02632337063550949, "epoch": 6.55, "learning_rate": 5.170232643275541e-07, "loss": 0.0352, "step": 6897, "task_loss": 0.11460480093955994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994043899233307, "compression_loss": 0.0, "distillation_loss": 0.09149836003780365, "epoch": 6.55, "learning_rate": 5.148696983173551e-07, "loss": 0.0872, "step": 6898, "task_loss": 0.048190876841545105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994055593145083, "compression_loss": 0.0, "distillation_loss": 0.08621042221784592, "epoch": 6.55, "learning_rate": 5.127205801489954e-07, "loss": 0.0784, "step": 6899, "task_loss": 0.0076434426009655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799406727174067, "compression_loss": 0.0, "distillation_loss": 0.03059300407767296, "epoch": 6.55, "learning_rate": 5.105759102128738e-07, "loss": 0.0345, "step": 6900, "task_loss": 0.06991107016801834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994078935030104, "compression_loss": 0.0, "distillation_loss": 0.01730835810303688, "epoch": 6.55, "learning_rate": 5.084356888985814e-07, "loss": 0.0217, "step": 6901, "task_loss": 0.06103728711605072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994090583023424, "compression_loss": 0.0, "distillation_loss": 0.03369038552045822, "epoch": 6.55, "learning_rate": 5.062999165948989e-07, "loss": 0.04, "step": 6902, "task_loss": 0.09683724492788315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994102215730666, "compression_loss": 0.0, "distillation_loss": 0.057407524436712265, "epoch": 6.56, "learning_rate": 5.041685936897966e-07, "loss": 0.0604, "step": 6903, "task_loss": 0.08716972172260284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994113833161866, "compression_loss": 0.0, "distillation_loss": 0.06570513546466827, "epoch": 6.56, "learning_rate": 5.020417205704453e-07, "loss": 0.0642, "step": 6904, "task_loss": 0.0507512241601944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994125435327062, "compression_loss": 0.0, "distillation_loss": 0.017435505986213684, "epoch": 6.56, "learning_rate": 4.999192976231998e-07, "loss": 0.026, "step": 6905, "task_loss": 0.10334107279777527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799413702223629, "compression_loss": 0.0, "distillation_loss": 0.0625772476196289, "epoch": 6.56, "learning_rate": 4.978013252336072e-07, "loss": 0.0609, "step": 6906, "task_loss": 0.04585137218236923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994148593899586, "compression_loss": 0.0, "distillation_loss": 0.07100007683038712, "epoch": 6.56, "learning_rate": 4.956878037864043e-07, "loss": 0.0801, "step": 6907, "task_loss": 0.1624176800251007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994160150326988, "compression_loss": 0.0, "distillation_loss": 0.0802350789308548, "epoch": 6.56, "learning_rate": 4.935787336655285e-07, "loss": 0.0845, "step": 6908, "task_loss": 0.12317005544900894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994171691528533, "compression_loss": 0.0, "distillation_loss": 0.024071305990219116, "epoch": 6.56, "learning_rate": 4.914741152541008e-07, "loss": 0.0222, "step": 6909, "task_loss": 0.005024924874305725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994183217514258, "compression_loss": 0.0, "distillation_loss": 0.10285592824220657, "epoch": 6.56, "learning_rate": 4.893739489344323e-07, "loss": 0.1003, "step": 6910, "task_loss": 0.07742301374673843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994194728294198, "compression_loss": 0.0, "distillation_loss": 0.0873519778251648, "epoch": 6.56, "learning_rate": 4.872782350880317e-07, "loss": 0.0973, "step": 6911, "task_loss": 0.18709777295589447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994206223878393, "compression_loss": 0.0, "distillation_loss": 0.02852596715092659, "epoch": 6.56, "learning_rate": 4.851869740955944e-07, "loss": 0.0342, "step": 6912, "task_loss": 0.08523042500019073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994217704276876, "compression_loss": 0.0, "distillation_loss": 0.15731999278068542, "epoch": 6.57, "learning_rate": 4.831001663370083e-07, "loss": 0.1699, "step": 6913, "task_loss": 0.2830032706260681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994229169499687, "compression_loss": 0.0, "distillation_loss": 0.04235370457172394, "epoch": 6.57, "learning_rate": 4.810178121913478e-07, "loss": 0.0412, "step": 6914, "task_loss": 0.030333248898386955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994240619556862, "compression_loss": 0.0, "distillation_loss": 0.07360881567001343, "epoch": 6.57, "learning_rate": 4.78939912036891e-07, "loss": 0.0884, "step": 6915, "task_loss": 0.22195890545845032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994252054458436, "compression_loss": 0.0, "distillation_loss": 0.017446476966142654, "epoch": 6.57, "learning_rate": 4.768664662510941e-07, "loss": 0.016, "step": 6916, "task_loss": 0.0031855106353759766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994263474214448, "compression_loss": 0.0, "distillation_loss": 0.023602165281772614, "epoch": 6.57, "learning_rate": 4.7479747521060324e-07, "loss": 0.0265, "step": 6917, "task_loss": 0.05219149589538574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994274878834934, "compression_loss": 0.0, "distillation_loss": 0.09219710528850555, "epoch": 6.57, "learning_rate": 4.727329392912705e-07, "loss": 0.0911, "step": 6918, "task_loss": 0.08144529163837433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799428626832993, "compression_loss": 0.0, "distillation_loss": 0.08491960167884827, "epoch": 6.57, "learning_rate": 4.706728588681236e-07, "loss": 0.0811, "step": 6919, "task_loss": 0.04681730642914772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994297642709475, "compression_loss": 0.0, "distillation_loss": 0.10116090625524521, "epoch": 6.57, "learning_rate": 4.6861723431538276e-07, "loss": 0.0987, "step": 6920, "task_loss": 0.0767616331577301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994309001983604, "compression_loss": 0.0, "distillation_loss": 0.012731763534247875, "epoch": 6.57, "learning_rate": 4.665660660064686e-07, "loss": 0.0123, "step": 6921, "task_loss": 0.008857103064656258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994320346162354, "compression_loss": 0.0, "distillation_loss": 0.02278589829802513, "epoch": 6.57, "learning_rate": 4.6451935431398306e-07, "loss": 0.021, "step": 6922, "task_loss": 0.004962848499417305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994331675255762, "compression_loss": 0.0, "distillation_loss": 0.04454980418086052, "epoch": 6.57, "learning_rate": 4.6247709960972053e-07, "loss": 0.0438, "step": 6923, "task_loss": 0.03726205229759216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994342989273865, "compression_loss": 0.0, "distillation_loss": 0.07946009188890457, "epoch": 6.58, "learning_rate": 4.604393022646647e-07, "loss": 0.0817, "step": 6924, "task_loss": 0.10193922370672226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79943542882267, "compression_loss": 0.0, "distillation_loss": 0.03449847549200058, "epoch": 6.58, "learning_rate": 4.584059626489973e-07, "loss": 0.033, "step": 6925, "task_loss": 0.01942656934261322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994365572124305, "compression_loss": 0.0, "distillation_loss": 0.027452457696199417, "epoch": 6.58, "learning_rate": 4.5637708113207834e-07, "loss": 0.0329, "step": 6926, "task_loss": 0.08229565620422363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994376840976714, "compression_loss": 0.0, "distillation_loss": 0.014757486060261726, "epoch": 6.58, "learning_rate": 4.5435265808246585e-07, "loss": 0.0137, "step": 6927, "task_loss": 0.0037736501544713974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994388094793965, "compression_loss": 0.0, "distillation_loss": 0.07772176712751389, "epoch": 6.58, "learning_rate": 4.5233269386790734e-07, "loss": 0.0742, "step": 6928, "task_loss": 0.042066704481840134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994399333586095, "compression_loss": 0.0, "distillation_loss": 0.08422841131687164, "epoch": 6.58, "learning_rate": 4.5031718885533423e-07, "loss": 0.083, "step": 6929, "task_loss": 0.07235755026340485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994410557363142, "compression_loss": 0.0, "distillation_loss": 0.04439088702201843, "epoch": 6.58, "learning_rate": 4.483061434108815e-07, "loss": 0.0417, "step": 6930, "task_loss": 0.017775850370526314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799442176613514, "compression_loss": 0.0, "distillation_loss": 0.03447749838232994, "epoch": 6.58, "learning_rate": 4.462995578998569e-07, "loss": 0.058, "step": 6931, "task_loss": 0.2692142724990845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994432959912129, "compression_loss": 0.0, "distillation_loss": 0.06817720085382462, "epoch": 6.58, "learning_rate": 4.4429743268676884e-07, "loss": 0.0724, "step": 6932, "task_loss": 0.11086969822645187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994444138704144, "compression_loss": 0.0, "distillation_loss": 0.05889270454645157, "epoch": 6.58, "learning_rate": 4.4229976813531806e-07, "loss": 0.057, "step": 6933, "task_loss": 0.040453821420669556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994455302521222, "compression_loss": 0.0, "distillation_loss": 0.02058131992816925, "epoch": 6.58, "learning_rate": 4.403065646083809e-07, "loss": 0.019, "step": 6934, "task_loss": 0.00488920696079731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79944664513734, "compression_loss": 0.0, "distillation_loss": 0.03927480801939964, "epoch": 6.59, "learning_rate": 4.383178224680401e-07, "loss": 0.0447, "step": 6935, "task_loss": 0.09392157196998596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994477585270715, "compression_loss": 0.0, "distillation_loss": 0.04294563829898834, "epoch": 6.59, "learning_rate": 4.3633354207555653e-07, "loss": 0.0536, "step": 6936, "task_loss": 0.1493779420852661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994488704223204, "compression_loss": 0.0, "distillation_loss": 0.015475824475288391, "epoch": 6.59, "learning_rate": 4.3435372379138085e-07, "loss": 0.0143, "step": 6937, "task_loss": 0.0033792592585086823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994499808240902, "compression_loss": 0.0, "distillation_loss": 0.017580023035407066, "epoch": 6.59, "learning_rate": 4.3237836797516417e-07, "loss": 0.0163, "step": 6938, "task_loss": 0.004325071349740028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994510897333849, "compression_loss": 0.0, "distillation_loss": 0.024261541664600372, "epoch": 6.59, "learning_rate": 4.304074749857362e-07, "loss": 0.0256, "step": 6939, "task_loss": 0.037375908344984055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799452197151208, "compression_loss": 0.0, "distillation_loss": 0.010520851239562035, "epoch": 6.59, "learning_rate": 4.284410451811188e-07, "loss": 0.0194, "step": 6940, "task_loss": 0.09917841851711273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994533030785631, "compression_loss": 0.0, "distillation_loss": 0.04114697501063347, "epoch": 6.59, "learning_rate": 4.2647907891852357e-07, "loss": 0.0494, "step": 6941, "task_loss": 0.12371751666069031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994544075164539, "compression_loss": 0.0, "distillation_loss": 0.02658040262758732, "epoch": 6.59, "learning_rate": 4.2452157655435145e-07, "loss": 0.0263, "step": 6942, "task_loss": 0.023485524579882622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994555104658843, "compression_loss": 0.0, "distillation_loss": 0.01725972816348076, "epoch": 6.59, "learning_rate": 4.225685384441902e-07, "loss": 0.0161, "step": 6943, "task_loss": 0.005400681868195534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994566119278578, "compression_loss": 0.0, "distillation_loss": 0.03645411878824234, "epoch": 6.59, "learning_rate": 4.2061996494282e-07, "loss": 0.0406, "step": 6944, "task_loss": 0.07785525172948837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994577119033781, "compression_loss": 0.0, "distillation_loss": 0.0437319241464138, "epoch": 6.6, "learning_rate": 4.1867585640421036e-07, "loss": 0.0513, "step": 6945, "task_loss": 0.11954252421855927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994588103934489, "compression_loss": 0.0, "distillation_loss": 0.040995851159095764, "epoch": 6.6, "learning_rate": 4.16736213181515e-07, "loss": 0.0448, "step": 6946, "task_loss": 0.0789896547794342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994599073990739, "compression_loss": 0.0, "distillation_loss": 0.0845775455236435, "epoch": 6.6, "learning_rate": 4.148010356270826e-07, "loss": 0.0876, "step": 6947, "task_loss": 0.11447571963071823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994610029212568, "compression_loss": 0.0, "distillation_loss": 0.019938606768846512, "epoch": 6.6, "learning_rate": 4.128703240924431e-07, "loss": 0.0185, "step": 6948, "task_loss": 0.00549742765724659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994620969610012, "compression_loss": 0.0, "distillation_loss": 0.01663760282099247, "epoch": 6.6, "learning_rate": 4.109440789283242e-07, "loss": 0.0323, "step": 6949, "task_loss": 0.17326664924621582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994631895193108, "compression_loss": 0.0, "distillation_loss": 0.049342524260282516, "epoch": 6.6, "learning_rate": 4.0902230048463495e-07, "loss": 0.0508, "step": 6950, "task_loss": 0.0635058730840683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994642805971894, "compression_loss": 0.0, "distillation_loss": 0.06009483337402344, "epoch": 6.6, "learning_rate": 4.071049891104739e-07, "loss": 0.0572, "step": 6951, "task_loss": 0.031034225597977638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994653701956406, "compression_loss": 0.0, "distillation_loss": 0.03527653217315674, "epoch": 6.6, "learning_rate": 4.0519214515413463e-07, "loss": 0.0381, "step": 6952, "task_loss": 0.06350395828485489 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799466458315668, "compression_loss": 0.0, "distillation_loss": 0.056782066822052, "epoch": 6.6, "learning_rate": 4.0328376896309473e-07, "loss": 0.0651, "step": 6953, "task_loss": 0.13970398902893066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994675449582754, "compression_loss": 0.0, "distillation_loss": 0.08646276593208313, "epoch": 6.6, "learning_rate": 4.01379860884013e-07, "loss": 0.0818, "step": 6954, "task_loss": 0.040067270398139954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994686301244665, "compression_loss": 0.0, "distillation_loss": 0.08508466184139252, "epoch": 6.6, "learning_rate": 3.994804212627462e-07, "loss": 0.0789, "step": 6955, "task_loss": 0.02337714470922947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994697138152448, "compression_loss": 0.0, "distillation_loss": 0.04727325588464737, "epoch": 6.61, "learning_rate": 3.975854504443433e-07, "loss": 0.0431, "step": 6956, "task_loss": 0.005318284034729004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994707960316142, "compression_loss": 0.0, "distillation_loss": 0.025855958461761475, "epoch": 6.61, "learning_rate": 3.956949487730288e-07, "loss": 0.0347, "step": 6957, "task_loss": 0.11433203518390656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994718767745782, "compression_loss": 0.0, "distillation_loss": 0.060321077704429626, "epoch": 6.61, "learning_rate": 3.9380891659221986e-07, "loss": 0.0617, "step": 6958, "task_loss": 0.07363105565309525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994729560451407, "compression_loss": 0.0, "distillation_loss": 0.034057214856147766, "epoch": 6.61, "learning_rate": 3.9192735424452843e-07, "loss": 0.0396, "step": 6959, "task_loss": 0.08952410519123077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994740338443052, "compression_loss": 0.0, "distillation_loss": 0.022792372852563858, "epoch": 6.61, "learning_rate": 3.900502620717478e-07, "loss": 0.0222, "step": 6960, "task_loss": 0.017220191657543182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994751101730754, "compression_loss": 0.0, "distillation_loss": 0.02029343880712986, "epoch": 6.61, "learning_rate": 3.881776404148552e-07, "loss": 0.0242, "step": 6961, "task_loss": 0.059222202748060226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799476185032455, "compression_loss": 0.0, "distillation_loss": 0.026080617681145668, "epoch": 6.61, "learning_rate": 3.8630948961403125e-07, "loss": 0.0244, "step": 6962, "task_loss": 0.00916319526731968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994772584234476, "compression_loss": 0.0, "distillation_loss": 0.02705274149775505, "epoch": 6.61, "learning_rate": 3.844458100086268e-07, "loss": 0.0329, "step": 6963, "task_loss": 0.08544865250587463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994783303470572, "compression_loss": 0.0, "distillation_loss": 0.02477836422622204, "epoch": 6.61, "learning_rate": 3.8258660193719044e-07, "loss": 0.0374, "step": 6964, "task_loss": 0.15073972940444946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994794008042871, "compression_loss": 0.0, "distillation_loss": 0.022429198026657104, "epoch": 6.61, "learning_rate": 3.8073186573745757e-07, "loss": 0.0208, "step": 6965, "task_loss": 0.006342481821775436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994804697961412, "compression_loss": 0.0, "distillation_loss": 0.08988398313522339, "epoch": 6.62, "learning_rate": 3.7888160174634757e-07, "loss": 0.0898, "step": 6966, "task_loss": 0.08916927129030228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994815373236231, "compression_loss": 0.0, "distillation_loss": 0.028484515845775604, "epoch": 6.62, "learning_rate": 3.7703581029997215e-07, "loss": 0.0278, "step": 6967, "task_loss": 0.022118881344795227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994826033877366, "compression_loss": 0.0, "distillation_loss": 0.0302952341735363, "epoch": 6.62, "learning_rate": 3.75194491733627e-07, "loss": 0.028, "step": 6968, "task_loss": 0.0076552219688892365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994836679894852, "compression_loss": 0.0, "distillation_loss": 0.03270921856164932, "epoch": 6.62, "learning_rate": 3.733576463817973e-07, "loss": 0.0308, "step": 6969, "task_loss": 0.013129375874996185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994847311298727, "compression_loss": 0.0, "distillation_loss": 0.052680712193250656, "epoch": 6.62, "learning_rate": 3.71525274578155e-07, "loss": 0.0622, "step": 6970, "task_loss": 0.1481049656867981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994857928099028, "compression_loss": 0.0, "distillation_loss": 0.028156310319900513, "epoch": 6.62, "learning_rate": 3.696973766555589e-07, "loss": 0.0263, "step": 6971, "task_loss": 0.009480351582169533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994868530305791, "compression_loss": 0.0, "distillation_loss": 0.036262333393096924, "epoch": 6.62, "learning_rate": 3.6787395294605455e-07, "loss": 0.0339, "step": 6972, "task_loss": 0.012746721506118774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994879117929053, "compression_loss": 0.0, "distillation_loss": 0.08903949707746506, "epoch": 6.62, "learning_rate": 3.660550037808741e-07, "loss": 0.1021, "step": 6973, "task_loss": 0.21985527873039246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994889690978851, "compression_loss": 0.0, "distillation_loss": 0.03600417822599411, "epoch": 6.62, "learning_rate": 3.642405294904422e-07, "loss": 0.0337, "step": 6974, "task_loss": 0.01251133345067501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994900249465222, "compression_loss": 0.0, "distillation_loss": 0.02271505631506443, "epoch": 6.62, "learning_rate": 3.624305304043646e-07, "loss": 0.0211, "step": 6975, "task_loss": 0.006729325279593468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994910793398202, "compression_loss": 0.0, "distillation_loss": 0.04386524111032486, "epoch": 6.62, "learning_rate": 3.606250068514394e-07, "loss": 0.048, "step": 6976, "task_loss": 0.08489131927490234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799492132278783, "compression_loss": 0.0, "distillation_loss": 0.03540867939591408, "epoch": 6.63, "learning_rate": 3.5882395915964315e-07, "loss": 0.0324, "step": 6977, "task_loss": 0.005433699116110802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799493183764414, "compression_loss": 0.0, "distillation_loss": 0.04147842526435852, "epoch": 6.63, "learning_rate": 3.570273876561475e-07, "loss": 0.0387, "step": 6978, "task_loss": 0.013455674052238464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994942337977171, "compression_loss": 0.0, "distillation_loss": 0.043742306530475616, "epoch": 6.63, "learning_rate": 3.552352926673136e-07, "loss": 0.0518, "step": 6979, "task_loss": 0.12435872107744217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994952823796958, "compression_loss": 0.0, "distillation_loss": 0.04596320539712906, "epoch": 6.63, "learning_rate": 3.5344767451867545e-07, "loss": 0.0448, "step": 6980, "task_loss": 0.03420111536979675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799496329511354, "compression_loss": 0.0, "distillation_loss": 0.09754104167222977, "epoch": 6.63, "learning_rate": 3.51664533534965e-07, "loss": 0.1004, "step": 6981, "task_loss": 0.1259613186120987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994973751936951, "compression_loss": 0.0, "distillation_loss": 0.014837587252259254, "epoch": 6.63, "learning_rate": 3.498858700401009e-07, "loss": 0.0224, "step": 6982, "task_loss": 0.09078174829483032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799498419427723, "compression_loss": 0.0, "distillation_loss": 0.07896040380001068, "epoch": 6.63, "learning_rate": 3.481116843571858e-07, "loss": 0.0744, "step": 6983, "task_loss": 0.03332065790891647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7994994622144413, "compression_loss": 0.0, "distillation_loss": 0.05292394757270813, "epoch": 6.63, "learning_rate": 3.463419768085091e-07, "loss": 0.0551, "step": 6984, "task_loss": 0.07501198351383209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995005035548538, "compression_loss": 0.0, "distillation_loss": 0.019998980686068535, "epoch": 6.63, "learning_rate": 3.4457674771554425e-07, "loss": 0.0186, "step": 6985, "task_loss": 0.005534190684556961 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799501543449964, "compression_loss": 0.0, "distillation_loss": 0.03253600001335144, "epoch": 6.63, "learning_rate": 3.428159973989542e-07, "loss": 0.0317, "step": 6986, "task_loss": 0.024117592722177505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995025819007757, "compression_loss": 0.0, "distillation_loss": 0.02256142720580101, "epoch": 6.64, "learning_rate": 3.4105972617859136e-07, "loss": 0.0233, "step": 6987, "task_loss": 0.029686935245990753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995036189082926, "compression_loss": 0.0, "distillation_loss": 0.02714642882347107, "epoch": 6.64, "learning_rate": 3.3930793437348675e-07, "loss": 0.0258, "step": 6988, "task_loss": 0.013444697484374046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995046544735183, "compression_loss": 0.0, "distillation_loss": 0.08077394962310791, "epoch": 6.64, "learning_rate": 3.3756062230186067e-07, "loss": 0.0878, "step": 6989, "task_loss": 0.15142491459846497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995056885974564, "compression_loss": 0.0, "distillation_loss": 0.10645799338817596, "epoch": 6.64, "learning_rate": 3.358177902811233e-07, "loss": 0.1093, "step": 6990, "task_loss": 0.134621262550354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995067212811109, "compression_loss": 0.0, "distillation_loss": 0.020988423377275467, "epoch": 6.64, "learning_rate": 3.340794386278712e-07, "loss": 0.0284, "step": 6991, "task_loss": 0.094853974878788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995077525254851, "compression_loss": 0.0, "distillation_loss": 0.05726141855120659, "epoch": 6.64, "learning_rate": 3.3234556765787963e-07, "loss": 0.0633, "step": 6992, "task_loss": 0.1174088567495346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799508782331583, "compression_loss": 0.0, "distillation_loss": 0.05345267802476883, "epoch": 6.64, "learning_rate": 3.306161776861161e-07, "loss": 0.0579, "step": 6993, "task_loss": 0.0983385294675827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799509810700408, "compression_loss": 0.0, "distillation_loss": 0.0895034670829773, "epoch": 6.64, "learning_rate": 3.288912690267348e-07, "loss": 0.0974, "step": 6994, "task_loss": 0.16841556131839752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799510837632964, "compression_loss": 0.0, "distillation_loss": 0.020515451207756996, "epoch": 6.64, "learning_rate": 3.2717084199307134e-07, "loss": 0.0191, "step": 6995, "task_loss": 0.006037186831235886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995118631302546, "compression_loss": 0.0, "distillation_loss": 0.02419309876859188, "epoch": 6.64, "learning_rate": 3.2545489689764784e-07, "loss": 0.0414, "step": 6996, "task_loss": 0.19664430618286133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995128871932835, "compression_loss": 0.0, "distillation_loss": 0.0171881765127182, "epoch": 6.64, "learning_rate": 3.237434340521789e-07, "loss": 0.0199, "step": 6997, "task_loss": 0.044096946716308594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995139098230544, "compression_loss": 0.0, "distillation_loss": 0.07256430387496948, "epoch": 6.65, "learning_rate": 3.220364537675574e-07, "loss": 0.0694, "step": 6998, "task_loss": 0.04073936119675636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995149310205709, "compression_loss": 0.0, "distillation_loss": 0.07214680314064026, "epoch": 6.65, "learning_rate": 3.203339563538632e-07, "loss": 0.067, "step": 6999, "task_loss": 0.021014513447880745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995159507868367, "compression_loss": 0.0, "distillation_loss": 0.03768681734800339, "epoch": 6.65, "learning_rate": 3.1863594212036274e-07, "loss": 0.0393, "step": 7000, "task_loss": 0.053721100091934204 }, { "epoch": 6.65, "eval_accuracy": 0.8922018348623854, "eval_loss": 0.4350743889808655, "eval_runtime": 18.1267, "eval_samples_per_second": 48.106, "eval_steps_per_second": 6.013, "step": 7000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995169691228556, "compression_loss": 0.0, "distillation_loss": 0.049745362251996994, "epoch": 6.65, "learning_rate": 3.1694241137551203e-07, "loss": 0.0466, "step": 7001, "task_loss": 0.018015502020716667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995179860296312, "compression_loss": 0.0, "distillation_loss": 0.029364963993430138, "epoch": 6.65, "learning_rate": 3.1525336442694843e-07, "loss": 0.0338, "step": 7002, "task_loss": 0.07397031784057617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995190015081671, "compression_loss": 0.0, "distillation_loss": 0.02187550999224186, "epoch": 6.65, "learning_rate": 3.1356880158149025e-07, "loss": 0.0202, "step": 7003, "task_loss": 0.0046534184366464615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995200155594672, "compression_loss": 0.0, "distillation_loss": 0.013252021744847298, "epoch": 6.65, "learning_rate": 3.118887231451539e-07, "loss": 0.0126, "step": 7004, "task_loss": 0.006522929295897484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995210281845349, "compression_loss": 0.0, "distillation_loss": 0.039753131568431854, "epoch": 6.65, "learning_rate": 3.1021312942313144e-07, "loss": 0.0451, "step": 7005, "task_loss": 0.09337079524993896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799522039384374, "compression_loss": 0.0, "distillation_loss": 0.11256477236747742, "epoch": 6.65, "learning_rate": 3.0854202071979865e-07, "loss": 0.1108, "step": 7006, "task_loss": 0.09483619034290314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995230491599884, "compression_loss": 0.0, "distillation_loss": 0.024704527109861374, "epoch": 6.65, "learning_rate": 3.0687539733872115e-07, "loss": 0.0228, "step": 7007, "task_loss": 0.006121648475527763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995240575123814, "compression_loss": 0.0, "distillation_loss": 0.030473047867417336, "epoch": 6.66, "learning_rate": 3.05213259582654e-07, "loss": 0.0289, "step": 7008, "task_loss": 0.015124401077628136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799525064442557, "compression_loss": 0.0, "distillation_loss": 0.021761227399110794, "epoch": 6.66, "learning_rate": 3.035556077535306e-07, "loss": 0.0294, "step": 7009, "task_loss": 0.09853780269622803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995260699515188, "compression_loss": 0.0, "distillation_loss": 0.03172404319047928, "epoch": 6.66, "learning_rate": 3.0190244215246857e-07, "loss": 0.0289, "step": 7010, "task_loss": 0.003925886005163193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995270740402703, "compression_loss": 0.0, "distillation_loss": 0.062315717339515686, "epoch": 6.66, "learning_rate": 3.002537630797747e-07, "loss": 0.0584, "step": 7011, "task_loss": 0.02332870475947857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995280767098154, "compression_loss": 0.0, "distillation_loss": 0.03596599027514458, "epoch": 6.66, "learning_rate": 2.986095708349429e-07, "loss": 0.034, "step": 7012, "task_loss": 0.01661401055753231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995290779611577, "compression_loss": 0.0, "distillation_loss": 0.03480362519621849, "epoch": 6.66, "learning_rate": 2.9696986571664253e-07, "loss": 0.0327, "step": 7013, "task_loss": 0.013543521985411644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995300777953008, "compression_loss": 0.0, "distillation_loss": 0.024172749370336533, "epoch": 6.66, "learning_rate": 2.953346480227409e-07, "loss": 0.0245, "step": 7014, "task_loss": 0.027264190837740898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995310762132486, "compression_loss": 0.0, "distillation_loss": 0.02965361624956131, "epoch": 6.66, "learning_rate": 2.937039180502782e-07, "loss": 0.0448, "step": 7015, "task_loss": 0.18084561824798584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995320732160046, "compression_loss": 0.0, "distillation_loss": 0.0646897479891777, "epoch": 6.66, "learning_rate": 2.920776760954813e-07, "loss": 0.0833, "step": 7016, "task_loss": 0.2510213255882263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995330688045724, "compression_loss": 0.0, "distillation_loss": 0.03607410565018654, "epoch": 6.66, "learning_rate": 2.904559224537723e-07, "loss": 0.0336, "step": 7017, "task_loss": 0.011309439316391945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799534062979956, "compression_loss": 0.0, "distillation_loss": 0.0305813979357481, "epoch": 6.66, "learning_rate": 2.888386574197488e-07, "loss": 0.0356, "step": 7018, "task_loss": 0.08079958707094193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995350557431589, "compression_loss": 0.0, "distillation_loss": 0.022271297872066498, "epoch": 6.67, "learning_rate": 2.872258812871925e-07, "loss": 0.0463, "step": 7019, "task_loss": 0.26298779249191284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995360470951847, "compression_loss": 0.0, "distillation_loss": 0.03892994672060013, "epoch": 6.67, "learning_rate": 2.8561759434907185e-07, "loss": 0.0363, "step": 7020, "task_loss": 0.012905815616250038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799537037037037, "compression_loss": 0.0, "distillation_loss": 0.02603466808795929, "epoch": 6.67, "learning_rate": 2.84013796897542e-07, "loss": 0.0408, "step": 7021, "task_loss": 0.17337138950824738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995380255697199, "compression_loss": 0.0, "distillation_loss": 0.024577513337135315, "epoch": 6.67, "learning_rate": 2.8241448922393945e-07, "loss": 0.0225, "step": 7022, "task_loss": 0.003398273140192032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995390126942367, "compression_loss": 0.0, "distillation_loss": 0.048275694251060486, "epoch": 6.67, "learning_rate": 2.8081967161878443e-07, "loss": 0.053, "step": 7023, "task_loss": 0.09526211768388748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995399984115912, "compression_loss": 0.0, "distillation_loss": 0.029721971601247787, "epoch": 6.67, "learning_rate": 2.7922934437178695e-07, "loss": 0.043, "step": 7024, "task_loss": 0.16234955191612244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995409827227872, "compression_loss": 0.0, "distillation_loss": 0.04240930825471878, "epoch": 6.67, "learning_rate": 2.7764350777183533e-07, "loss": 0.0402, "step": 7025, "task_loss": 0.02065758965909481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995419656288282, "compression_loss": 0.0, "distillation_loss": 0.08198815584182739, "epoch": 6.67, "learning_rate": 2.760621621070047e-07, "loss": 0.0865, "step": 7026, "task_loss": 0.126779243350029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995429471307178, "compression_loss": 0.0, "distillation_loss": 0.022945459932088852, "epoch": 6.67, "learning_rate": 2.744853076645515e-07, "loss": 0.0216, "step": 7027, "task_loss": 0.009481718763709068 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79954392722946, "compression_loss": 0.0, "distillation_loss": 0.0208904929459095, "epoch": 6.67, "learning_rate": 2.729129447309242e-07, "loss": 0.0194, "step": 7028, "task_loss": 0.006270444020628929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995449059260583, "compression_loss": 0.0, "distillation_loss": 0.05013927444815636, "epoch": 6.68, "learning_rate": 2.713450735917472e-07, "loss": 0.0459, "step": 7029, "task_loss": 0.007837006822228432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995458832215164, "compression_loss": 0.0, "distillation_loss": 0.04295579344034195, "epoch": 6.68, "learning_rate": 2.6978169453183153e-07, "loss": 0.0415, "step": 7030, "task_loss": 0.028465719893574715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799546859116838, "compression_loss": 0.0, "distillation_loss": 0.03028765879571438, "epoch": 6.68, "learning_rate": 2.6822280783517504e-07, "loss": 0.0279, "step": 7031, "task_loss": 0.006683392450213432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995478336130267, "compression_loss": 0.0, "distillation_loss": 0.025525229051709175, "epoch": 6.68, "learning_rate": 2.66668413784954e-07, "loss": 0.0238, "step": 7032, "task_loss": 0.008154524490237236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995488067110863, "compression_loss": 0.0, "distillation_loss": 0.020436950027942657, "epoch": 6.68, "learning_rate": 2.651185126635314e-07, "loss": 0.0213, "step": 7033, "task_loss": 0.029449906200170517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995497784120204, "compression_loss": 0.0, "distillation_loss": 0.020888283848762512, "epoch": 6.68, "learning_rate": 2.635731047524542e-07, "loss": 0.0276, "step": 7034, "task_loss": 0.08772215992212296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995507487168327, "compression_loss": 0.0, "distillation_loss": 0.057622358202934265, "epoch": 6.68, "learning_rate": 2.620321903324563e-07, "loss": 0.0553, "step": 7035, "task_loss": 0.034089043736457825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995517176265269, "compression_loss": 0.0, "distillation_loss": 0.021918507292866707, "epoch": 6.68, "learning_rate": 2.6049576968345256e-07, "loss": 0.0204, "step": 7036, "task_loss": 0.006641771644353867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995526851421066, "compression_loss": 0.0, "distillation_loss": 0.01303351204842329, "epoch": 6.68, "learning_rate": 2.589638430845337e-07, "loss": 0.0124, "step": 7037, "task_loss": 0.00620032474398613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995536512645757, "compression_loss": 0.0, "distillation_loss": 0.04416292533278465, "epoch": 6.68, "learning_rate": 2.5743641081399094e-07, "loss": 0.0467, "step": 7038, "task_loss": 0.0692160353064537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995546159949376, "compression_loss": 0.0, "distillation_loss": 0.044526465237140656, "epoch": 6.68, "learning_rate": 2.559134731492857e-07, "loss": 0.0502, "step": 7039, "task_loss": 0.10125073045492172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995555793341962, "compression_loss": 0.0, "distillation_loss": 0.06591819226741791, "epoch": 6.69, "learning_rate": 2.5439503036706615e-07, "loss": 0.0642, "step": 7040, "task_loss": 0.04842384532094002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995565412833551, "compression_loss": 0.0, "distillation_loss": 0.04874798282980919, "epoch": 6.69, "learning_rate": 2.5288108274316435e-07, "loss": 0.0493, "step": 7041, "task_loss": 0.05393533781170845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799557501843418, "compression_loss": 0.0, "distillation_loss": 0.05359012633562088, "epoch": 6.69, "learning_rate": 2.5137163055259926e-07, "loss": 0.0642, "step": 7042, "task_loss": 0.15967698395252228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995584610153885, "compression_loss": 0.0, "distillation_loss": 0.20667728781700134, "epoch": 6.69, "learning_rate": 2.4986667406956544e-07, "loss": 0.2091, "step": 7043, "task_loss": 0.23065844178199768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995594188002704, "compression_loss": 0.0, "distillation_loss": 0.06716637313365936, "epoch": 6.69, "learning_rate": 2.4836621356744973e-07, "loss": 0.0693, "step": 7044, "task_loss": 0.08879762887954712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995603751990673, "compression_loss": 0.0, "distillation_loss": 0.07034559547901154, "epoch": 6.69, "learning_rate": 2.468702493188174e-07, "loss": 0.0752, "step": 7045, "task_loss": 0.1193457618355751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799561330212783, "compression_loss": 0.0, "distillation_loss": 0.02908926270902157, "epoch": 6.69, "learning_rate": 2.453787815954178e-07, "loss": 0.0302, "step": 7046, "task_loss": 0.03969724103808403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995622838424209, "compression_loss": 0.0, "distillation_loss": 0.07951683551073074, "epoch": 6.69, "learning_rate": 2.4389181066817865e-07, "loss": 0.0891, "step": 7047, "task_loss": 0.17507222294807434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799563236088985, "compression_loss": 0.0, "distillation_loss": 0.009979777969419956, "epoch": 6.69, "learning_rate": 2.4240933680722e-07, "loss": 0.0095, "step": 7048, "task_loss": 0.004972290247678757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995641869534789, "compression_loss": 0.0, "distillation_loss": 0.014456565491855145, "epoch": 6.69, "learning_rate": 2.4093136028184024e-07, "loss": 0.0133, "step": 7049, "task_loss": 0.003056548535823822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995651364369062, "compression_loss": 0.0, "distillation_loss": 0.06412947177886963, "epoch": 6.7, "learning_rate": 2.394578813605192e-07, "loss": 0.0856, "step": 7050, "task_loss": 0.2784784734249115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995660845402706, "compression_loss": 0.0, "distillation_loss": 0.048369936645030975, "epoch": 6.7, "learning_rate": 2.3798890031092037e-07, "loss": 0.0595, "step": 7051, "task_loss": 0.15925872325897217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995670312645757, "compression_loss": 0.0, "distillation_loss": 0.039365239441394806, "epoch": 6.7, "learning_rate": 2.3652441739989427e-07, "loss": 0.0365, "step": 7052, "task_loss": 0.010832777246832848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995679766108255, "compression_loss": 0.0, "distillation_loss": 0.08087919652462006, "epoch": 6.7, "learning_rate": 2.3506443289347259e-07, "loss": 0.0776, "step": 7053, "task_loss": 0.04851894453167915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995689205800234, "compression_loss": 0.0, "distillation_loss": 0.09845395386219025, "epoch": 6.7, "learning_rate": 2.3360894705686254e-07, "loss": 0.1004, "step": 7054, "task_loss": 0.11757258325815201 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799569863173173, "compression_loss": 0.0, "distillation_loss": 0.020045258104801178, "epoch": 6.7, "learning_rate": 2.3215796015446378e-07, "loss": 0.0187, "step": 7055, "task_loss": 0.006300348788499832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995708043912784, "compression_loss": 0.0, "distillation_loss": 0.06473547220230103, "epoch": 6.7, "learning_rate": 2.3071147244985713e-07, "loss": 0.0787, "step": 7056, "task_loss": 0.2046293467283249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995717442353428, "compression_loss": 0.0, "distillation_loss": 0.08796077966690063, "epoch": 6.7, "learning_rate": 2.292694842057991e-07, "loss": 0.1015, "step": 7057, "task_loss": 0.22374670207500458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995726827063702, "compression_loss": 0.0, "distillation_loss": 0.021141093224287033, "epoch": 6.7, "learning_rate": 2.278319956842384e-07, "loss": 0.0362, "step": 7058, "task_loss": 0.17210184037685394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995736198053641, "compression_loss": 0.0, "distillation_loss": 0.03681035712361336, "epoch": 6.7, "learning_rate": 2.263990071462968e-07, "loss": 0.0371, "step": 7059, "task_loss": 0.039506033062934875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995745555333283, "compression_loss": 0.0, "distillation_loss": 0.03608547896146774, "epoch": 6.7, "learning_rate": 2.2497051885228827e-07, "loss": 0.0381, "step": 7060, "task_loss": 0.05643710866570473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995754898912665, "compression_loss": 0.0, "distillation_loss": 0.015874043107032776, "epoch": 6.71, "learning_rate": 2.2354653106170244e-07, "loss": 0.015, "step": 7061, "task_loss": 0.006745463237166405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995764228801822, "compression_loss": 0.0, "distillation_loss": 0.03536041080951691, "epoch": 6.71, "learning_rate": 2.2212704403321572e-07, "loss": 0.0436, "step": 7062, "task_loss": 0.11737397313117981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995773545010793, "compression_loss": 0.0, "distillation_loss": 0.20444419980049133, "epoch": 6.71, "learning_rate": 2.2071205802468299e-07, "loss": 0.194, "step": 7063, "task_loss": 0.10004756599664688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995782847549614, "compression_loss": 0.0, "distillation_loss": 0.04743964970111847, "epoch": 6.71, "learning_rate": 2.1930157329314026e-07, "loss": 0.0468, "step": 7064, "task_loss": 0.041466888040304184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995792136428321, "compression_loss": 0.0, "distillation_loss": 0.021672185510396957, "epoch": 6.71, "learning_rate": 2.178955900948132e-07, "loss": 0.0216, "step": 7065, "task_loss": 0.0210067518055439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995801411656952, "compression_loss": 0.0, "distillation_loss": 0.04924159497022629, "epoch": 6.71, "learning_rate": 2.1649410868510577e-07, "loss": 0.05, "step": 7066, "task_loss": 0.056573107838630676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995810673245544, "compression_loss": 0.0, "distillation_loss": 0.03908860683441162, "epoch": 6.71, "learning_rate": 2.1509712931860327e-07, "loss": 0.0441, "step": 7067, "task_loss": 0.08947930485010147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995819921204131, "compression_loss": 0.0, "distillation_loss": 0.140781432390213, "epoch": 6.71, "learning_rate": 2.1370465224906655e-07, "loss": 0.1488, "step": 7068, "task_loss": 0.2205652892589569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995829155542754, "compression_loss": 0.0, "distillation_loss": 0.06154758110642433, "epoch": 6.71, "learning_rate": 2.1231667772945718e-07, "loss": 0.0578, "step": 7069, "task_loss": 0.02428608387708664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995838376271447, "compression_loss": 0.0, "distillation_loss": 0.08523669838905334, "epoch": 6.71, "learning_rate": 2.1093320601190124e-07, "loss": 0.0834, "step": 7070, "task_loss": 0.06637626141309738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995847583400248, "compression_loss": 0.0, "distillation_loss": 0.058525826781988144, "epoch": 6.72, "learning_rate": 2.0955423734771162e-07, "loss": 0.0756, "step": 7071, "task_loss": 0.2295752465724945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995856776939194, "compression_loss": 0.0, "distillation_loss": 0.03819255903363228, "epoch": 6.72, "learning_rate": 2.081797719873879e-07, "loss": 0.0503, "step": 7072, "task_loss": 0.15918831527233124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799586595689832, "compression_loss": 0.0, "distillation_loss": 0.12596416473388672, "epoch": 6.72, "learning_rate": 2.0680981018060819e-07, "loss": 0.1229, "step": 7073, "task_loss": 0.09560129046440125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995875123287665, "compression_loss": 0.0, "distillation_loss": 0.026970701292157173, "epoch": 6.72, "learning_rate": 2.054443521762317e-07, "loss": 0.038, "step": 7074, "task_loss": 0.13767385482788086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995884276117264, "compression_loss": 0.0, "distillation_loss": 0.017990119755268097, "epoch": 6.72, "learning_rate": 2.040833982223017e-07, "loss": 0.0198, "step": 7075, "task_loss": 0.036511633545160294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995893415397156, "compression_loss": 0.0, "distillation_loss": 0.040901899337768555, "epoch": 6.72, "learning_rate": 2.0272694856603991e-07, "loss": 0.0423, "step": 7076, "task_loss": 0.05451485887169838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995902541137376, "compression_loss": 0.0, "distillation_loss": 0.04028903320431709, "epoch": 6.72, "learning_rate": 2.013750034538492e-07, "loss": 0.0384, "step": 7077, "task_loss": 0.021871501579880714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995911653347961, "compression_loss": 0.0, "distillation_loss": 0.020981252193450928, "epoch": 6.72, "learning_rate": 2.0002756313132475e-07, "loss": 0.0195, "step": 7078, "task_loss": 0.006563212722539902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995920752038949, "compression_loss": 0.0, "distillation_loss": 0.0342644639313221, "epoch": 6.72, "learning_rate": 1.9868462784322917e-07, "loss": 0.036, "step": 7079, "task_loss": 0.05136824771761894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995929837220376, "compression_loss": 0.0, "distillation_loss": 0.1288357377052307, "epoch": 6.72, "learning_rate": 1.9734619783351727e-07, "loss": 0.1282, "step": 7080, "task_loss": 0.12262624502182007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995938908902278, "compression_loss": 0.0, "distillation_loss": 0.07863984256982803, "epoch": 6.72, "learning_rate": 1.9601227334531956e-07, "loss": 0.0845, "step": 7081, "task_loss": 0.13708055019378662 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995947967094693, "compression_loss": 0.0, "distillation_loss": 0.04373558238148689, "epoch": 6.73, "learning_rate": 1.946828546209478e-07, "loss": 0.0465, "step": 7082, "task_loss": 0.07146552950143814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995957011807658, "compression_loss": 0.0, "distillation_loss": 0.07881873846054077, "epoch": 6.73, "learning_rate": 1.9335794190190327e-07, "loss": 0.0787, "step": 7083, "task_loss": 0.0776781439781189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995966043051209, "compression_loss": 0.0, "distillation_loss": 0.023989124223589897, "epoch": 6.73, "learning_rate": 1.920375354288545e-07, "loss": 0.0223, "step": 7084, "task_loss": 0.006650885567069054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995975060835383, "compression_loss": 0.0, "distillation_loss": 0.01962425746023655, "epoch": 6.73, "learning_rate": 1.9072163544166244e-07, "loss": 0.0248, "step": 7085, "task_loss": 0.07158499956130981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995984065170217, "compression_loss": 0.0, "distillation_loss": 0.029367784038186073, "epoch": 6.73, "learning_rate": 1.89410242179372e-07, "loss": 0.0391, "step": 7086, "task_loss": 0.12671338021755219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7995993056065748, "compression_loss": 0.0, "distillation_loss": 0.08097614347934723, "epoch": 6.73, "learning_rate": 1.881033558802009e-07, "loss": 0.0883, "step": 7087, "task_loss": 0.15451526641845703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996002033532013, "compression_loss": 0.0, "distillation_loss": 0.09910960495471954, "epoch": 6.73, "learning_rate": 1.8680097678154817e-07, "loss": 0.1072, "step": 7088, "task_loss": 0.17986391484737396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996010997579047, "compression_loss": 0.0, "distillation_loss": 0.03139527514576912, "epoch": 6.73, "learning_rate": 1.8550310511999958e-07, "loss": 0.0354, "step": 7089, "task_loss": 0.0717248022556305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996019948216889, "compression_loss": 0.0, "distillation_loss": 0.020029647275805473, "epoch": 6.73, "learning_rate": 1.8420974113131927e-07, "loss": 0.0187, "step": 7090, "task_loss": 0.006666556000709534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996028885455575, "compression_loss": 0.0, "distillation_loss": 0.03404942527413368, "epoch": 6.73, "learning_rate": 1.8292088505045546e-07, "loss": 0.0398, "step": 7091, "task_loss": 0.09123058617115021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996037809305142, "compression_loss": 0.0, "distillation_loss": 0.03140270709991455, "epoch": 6.74, "learning_rate": 1.8163653711153194e-07, "loss": 0.0426, "step": 7092, "task_loss": 0.14372628927230835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996046719775627, "compression_loss": 0.0, "distillation_loss": 0.027259420603513718, "epoch": 6.74, "learning_rate": 1.8035669754785934e-07, "loss": 0.0421, "step": 7093, "task_loss": 0.17557671666145325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996055616877066, "compression_loss": 0.0, "distillation_loss": 0.023833533748984337, "epoch": 6.74, "learning_rate": 1.7908136659192387e-07, "loss": 0.0223, "step": 7094, "task_loss": 0.00828717090189457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996064500619496, "compression_loss": 0.0, "distillation_loss": 0.11141105741262436, "epoch": 6.74, "learning_rate": 1.7781054447539858e-07, "loss": 0.1124, "step": 7095, "task_loss": 0.12159767001867294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996073371012955, "compression_loss": 0.0, "distillation_loss": 0.0840909332036972, "epoch": 6.74, "learning_rate": 1.7654423142913213e-07, "loss": 0.0868, "step": 7096, "task_loss": 0.1110580712556839 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996082228067479, "compression_loss": 0.0, "distillation_loss": 0.018214348703622818, "epoch": 6.74, "learning_rate": 1.7528242768315717e-07, "loss": 0.0171, "step": 7097, "task_loss": 0.007498534396290779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996091071793104, "compression_loss": 0.0, "distillation_loss": 0.02279078960418701, "epoch": 6.74, "learning_rate": 1.7402513346668758e-07, "loss": 0.021, "step": 7098, "task_loss": 0.0046605877578258514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996099902199868, "compression_loss": 0.0, "distillation_loss": 0.024694666266441345, "epoch": 6.74, "learning_rate": 1.727723490081129e-07, "loss": 0.026, "step": 7099, "task_loss": 0.03818400576710701 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996108719297808, "compression_loss": 0.0, "distillation_loss": 0.04019855335354805, "epoch": 6.74, "learning_rate": 1.7152407453501219e-07, "loss": 0.0374, "step": 7100, "task_loss": 0.012513427063822746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799611752309696, "compression_loss": 0.0, "distillation_loss": 0.028096789494156837, "epoch": 6.74, "learning_rate": 1.7028031027413737e-07, "loss": 0.0337, "step": 7101, "task_loss": 0.08449839055538177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799612631360736, "compression_loss": 0.0, "distillation_loss": 0.05807451158761978, "epoch": 6.74, "learning_rate": 1.6904105645142444e-07, "loss": 0.0659, "step": 7102, "task_loss": 0.13655823469161987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996135090839047, "compression_loss": 0.0, "distillation_loss": 0.05028388276696205, "epoch": 6.75, "learning_rate": 1.6780631329199326e-07, "loss": 0.047, "step": 7103, "task_loss": 0.01732676476240158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996143854802057, "compression_loss": 0.0, "distillation_loss": 0.01434339303523302, "epoch": 6.75, "learning_rate": 1.6657608102013667e-07, "loss": 0.0217, "step": 7104, "task_loss": 0.08795058727264404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996152605506426, "compression_loss": 0.0, "distillation_loss": 0.026891184970736504, "epoch": 6.75, "learning_rate": 1.65350359859337e-07, "loss": 0.0254, "step": 7105, "task_loss": 0.011834867298603058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996161342962191, "compression_loss": 0.0, "distillation_loss": 0.058868955820798874, "epoch": 6.75, "learning_rate": 1.6412915003224672e-07, "loss": 0.056, "step": 7106, "task_loss": 0.030042661353945732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799617006717939, "compression_loss": 0.0, "distillation_loss": 0.023804733529686928, "epoch": 6.75, "learning_rate": 1.629124517607078e-07, "loss": 0.022, "step": 7107, "task_loss": 0.006073350086808205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996178778168059, "compression_loss": 0.0, "distillation_loss": 0.025645965710282326, "epoch": 6.75, "learning_rate": 1.6170026526574066e-07, "loss": 0.0248, "step": 7108, "task_loss": 0.0175074003636837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996187475938235, "compression_loss": 0.0, "distillation_loss": 0.031395357102155685, "epoch": 6.75, "learning_rate": 1.6049259076753864e-07, "loss": 0.0353, "step": 7109, "task_loss": 0.07032643258571625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996196160499953, "compression_loss": 0.0, "distillation_loss": 0.025923658162355423, "epoch": 6.75, "learning_rate": 1.592894284854901e-07, "loss": 0.024, "step": 7110, "task_loss": 0.006603613495826721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996204831863253, "compression_loss": 0.0, "distillation_loss": 0.11871407926082611, "epoch": 6.75, "learning_rate": 1.5809077863814803e-07, "loss": 0.117, "step": 7111, "task_loss": 0.10196692496538162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799621349003817, "compression_loss": 0.0, "distillation_loss": 0.06971892714500427, "epoch": 6.75, "learning_rate": 1.5689664144325766e-07, "loss": 0.0639, "step": 7112, "task_loss": 0.011437831446528435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996222135034742, "compression_loss": 0.0, "distillation_loss": 0.0784948393702507, "epoch": 6.75, "learning_rate": 1.5570701711773717e-07, "loss": 0.0759, "step": 7113, "task_loss": 0.05259266868233681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996230766863003, "compression_loss": 0.0, "distillation_loss": 0.03151058405637741, "epoch": 6.76, "learning_rate": 1.5452190587768867e-07, "loss": 0.0486, "step": 7114, "task_loss": 0.20285047590732574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996239385532993, "compression_loss": 0.0, "distillation_loss": 0.09722236543893814, "epoch": 6.76, "learning_rate": 1.5334130793839275e-07, "loss": 0.1164, "step": 7115, "task_loss": 0.2885318398475647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996247991054748, "compression_loss": 0.0, "distillation_loss": 0.07263641804456711, "epoch": 6.76, "learning_rate": 1.5216522351431118e-07, "loss": 0.0823, "step": 7116, "task_loss": 0.16909703612327576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996256583438304, "compression_loss": 0.0, "distillation_loss": 0.04487297311425209, "epoch": 6.76, "learning_rate": 1.5099365281908694e-07, "loss": 0.0531, "step": 7117, "task_loss": 0.12665140628814697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996265162693698, "compression_loss": 0.0, "distillation_loss": 0.013267583213746548, "epoch": 6.76, "learning_rate": 1.4982659606553872e-07, "loss": 0.0206, "step": 7118, "task_loss": 0.08665129542350769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996273728830966, "compression_loss": 0.0, "distillation_loss": 0.012607241049408913, "epoch": 6.76, "learning_rate": 1.4866405346566914e-07, "loss": 0.0118, "step": 7119, "task_loss": 0.004675716161727905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996282281860148, "compression_loss": 0.0, "distillation_loss": 0.06657108664512634, "epoch": 6.76, "learning_rate": 1.4750602523065648e-07, "loss": 0.0707, "step": 7120, "task_loss": 0.10777979344129562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996290821791276, "compression_loss": 0.0, "distillation_loss": 0.028429970145225525, "epoch": 6.76, "learning_rate": 1.463525115708686e-07, "loss": 0.0311, "step": 7121, "task_loss": 0.05511371046304703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996299348634391, "compression_loss": 0.0, "distillation_loss": 0.039049431681632996, "epoch": 6.76, "learning_rate": 1.4520351269584343e-07, "loss": 0.0477, "step": 7122, "task_loss": 0.12538807094097137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996307862399529, "compression_loss": 0.0, "distillation_loss": 0.036447908729314804, "epoch": 6.76, "learning_rate": 1.4405902881430288e-07, "loss": 0.0352, "step": 7123, "task_loss": 0.023651236668229103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996316363096726, "compression_loss": 0.0, "distillation_loss": 0.027506425976753235, "epoch": 6.77, "learning_rate": 1.4291906013414457e-07, "loss": 0.0269, "step": 7124, "task_loss": 0.02168971858918667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996324850736019, "compression_loss": 0.0, "distillation_loss": 0.02820620872080326, "epoch": 6.77, "learning_rate": 1.417836068624556e-07, "loss": 0.0268, "step": 7125, "task_loss": 0.014426644891500473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996333325327444, "compression_loss": 0.0, "distillation_loss": 0.04337786138057709, "epoch": 6.77, "learning_rate": 1.406526692054877e-07, "loss": 0.0525, "step": 7126, "task_loss": 0.13505274057388306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996341786881038, "compression_loss": 0.0, "distillation_loss": 0.031544819474220276, "epoch": 6.77, "learning_rate": 1.3952624736869036e-07, "loss": 0.0305, "step": 7127, "task_loss": 0.0206135306507349 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996350235406839, "compression_loss": 0.0, "distillation_loss": 0.027235832065343857, "epoch": 6.77, "learning_rate": 1.3840434155668047e-07, "loss": 0.0423, "step": 7128, "task_loss": 0.17793741822242737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996358670914885, "compression_loss": 0.0, "distillation_loss": 0.026448873803019524, "epoch": 6.77, "learning_rate": 1.3728695197325336e-07, "loss": 0.0328, "step": 7129, "task_loss": 0.08974019438028336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996367093415209, "compression_loss": 0.0, "distillation_loss": 0.03855022042989731, "epoch": 6.77, "learning_rate": 1.361740788213911e-07, "loss": 0.0482, "step": 7130, "task_loss": 0.13530199229717255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996375502917851, "compression_loss": 0.0, "distillation_loss": 0.02457747608423233, "epoch": 6.77, "learning_rate": 1.3506572230325698e-07, "loss": 0.0226, "step": 7131, "task_loss": 0.004712225869297981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996383899432846, "compression_loss": 0.0, "distillation_loss": 0.09410484880208969, "epoch": 6.77, "learning_rate": 1.3396188262018438e-07, "loss": 0.1019, "step": 7132, "task_loss": 0.17175397276878357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996392282970232, "compression_loss": 0.0, "distillation_loss": 0.05483167618513107, "epoch": 6.77, "learning_rate": 1.3286255997268793e-07, "loss": 0.0599, "step": 7133, "task_loss": 0.10567161440849304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996400653540044, "compression_loss": 0.0, "distillation_loss": 0.03218763321638107, "epoch": 6.77, "learning_rate": 1.3176775456047175e-07, "loss": 0.0324, "step": 7134, "task_loss": 0.034330014139413834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996409011152322, "compression_loss": 0.0, "distillation_loss": 0.038693610578775406, "epoch": 6.78, "learning_rate": 1.3067746658241008e-07, "loss": 0.0474, "step": 7135, "task_loss": 0.12591272592544556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79964173558171, "compression_loss": 0.0, "distillation_loss": 0.018780706450343132, "epoch": 6.78, "learning_rate": 1.2959169623655843e-07, "loss": 0.0281, "step": 7136, "task_loss": 0.11212365329265594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996425687544417, "compression_loss": 0.0, "distillation_loss": 0.021541312336921692, "epoch": 6.78, "learning_rate": 1.2851044372015064e-07, "loss": 0.0275, "step": 7137, "task_loss": 0.08120422065258026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996434006344307, "compression_loss": 0.0, "distillation_loss": 0.018733816221356392, "epoch": 6.78, "learning_rate": 1.2743370922960462e-07, "loss": 0.0175, "step": 7138, "task_loss": 0.006464965641498566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799644231222681, "compression_loss": 0.0, "distillation_loss": 0.04339815676212311, "epoch": 6.78, "learning_rate": 1.263614929605139e-07, "loss": 0.0406, "step": 7139, "task_loss": 0.015392309054732323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799645060520196, "compression_loss": 0.0, "distillation_loss": 0.01880636438727379, "epoch": 6.78, "learning_rate": 1.252937951076477e-07, "loss": 0.0175, "step": 7140, "task_loss": 0.005763756111264229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996458885279796, "compression_loss": 0.0, "distillation_loss": 0.0439765602350235, "epoch": 6.78, "learning_rate": 1.2423061586496477e-07, "loss": 0.0415, "step": 7141, "task_loss": 0.019159667193889618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996467152470353, "compression_loss": 0.0, "distillation_loss": 0.036824434995651245, "epoch": 6.78, "learning_rate": 1.231719554255939e-07, "loss": 0.0418, "step": 7142, "task_loss": 0.08699777722358704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996475406783671, "compression_loss": 0.0, "distillation_loss": 0.014559917151927948, "epoch": 6.78, "learning_rate": 1.2211781398184242e-07, "loss": 0.0138, "step": 7143, "task_loss": 0.0064619556069374084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996483648229782, "compression_loss": 0.0, "distillation_loss": 0.042142875492572784, "epoch": 6.78, "learning_rate": 1.2106819172520434e-07, "loss": 0.0426, "step": 7144, "task_loss": 0.046216074377298355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996491876818727, "compression_loss": 0.0, "distillation_loss": 0.023237306624650955, "epoch": 6.79, "learning_rate": 1.200230888463466e-07, "loss": 0.0304, "step": 7145, "task_loss": 0.0949011892080307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799650009256054, "compression_loss": 0.0, "distillation_loss": 0.018296122550964355, "epoch": 6.79, "learning_rate": 1.1898250553512014e-07, "loss": 0.024, "step": 7146, "task_loss": 0.07504522800445557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799650829546526, "compression_loss": 0.0, "distillation_loss": 0.02411121129989624, "epoch": 6.79, "learning_rate": 1.1794644198054871e-07, "loss": 0.0222, "step": 7147, "task_loss": 0.0048682671040296555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996516485542922, "compression_loss": 0.0, "distillation_loss": 0.07223325222730637, "epoch": 6.79, "learning_rate": 1.1691489837083735e-07, "loss": 0.0702, "step": 7148, "task_loss": 0.052256107330322266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996524662803564, "compression_loss": 0.0, "distillation_loss": 0.0212749931961298, "epoch": 6.79, "learning_rate": 1.1588787489337505e-07, "loss": 0.0325, "step": 7149, "task_loss": 0.13337501883506775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996532827257222, "compression_loss": 0.0, "distillation_loss": 0.014818870462477207, "epoch": 6.79, "learning_rate": 1.1486537173472367e-07, "loss": 0.014, "step": 7150, "task_loss": 0.006277905777096748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996540978913934, "compression_loss": 0.0, "distillation_loss": 0.05914817005395889, "epoch": 6.79, "learning_rate": 1.1384738908062631e-07, "loss": 0.0666, "step": 7151, "task_loss": 0.1340140402317047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996549117783737, "compression_loss": 0.0, "distillation_loss": 0.044574763625860214, "epoch": 6.79, "learning_rate": 1.1283392711600171e-07, "loss": 0.0479, "step": 7152, "task_loss": 0.07772877812385559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996557243876665, "compression_loss": 0.0, "distillation_loss": 0.0406227707862854, "epoch": 6.79, "learning_rate": 1.1182498602495539e-07, "loss": 0.0471, "step": 7153, "task_loss": 0.10571486502885818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996565357202758, "compression_loss": 0.0, "distillation_loss": 0.08192355930805206, "epoch": 6.79, "learning_rate": 1.1082056599076018e-07, "loss": 0.0774, "step": 7154, "task_loss": 0.03656609356403351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996573457772052, "compression_loss": 0.0, "distillation_loss": 0.0265779010951519, "epoch": 6.79, "learning_rate": 1.0982066719587569e-07, "loss": 0.0401, "step": 7155, "task_loss": 0.16203996539115906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996581545594582, "compression_loss": 0.0, "distillation_loss": 0.017745571210980415, "epoch": 6.8, "learning_rate": 1.0882528982194273e-07, "loss": 0.0164, "step": 7156, "task_loss": 0.0038829054683446884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996589620680388, "compression_loss": 0.0, "distillation_loss": 0.022481059655547142, "epoch": 6.8, "learning_rate": 1.0783443404976946e-07, "loss": 0.0208, "step": 7157, "task_loss": 0.005929671227931976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996597683039504, "compression_loss": 0.0, "distillation_loss": 0.02317318134009838, "epoch": 6.8, "learning_rate": 1.0684810005935631e-07, "loss": 0.0214, "step": 7158, "task_loss": 0.005201876163482666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996605732681967, "compression_loss": 0.0, "distillation_loss": 0.05156789720058441, "epoch": 6.8, "learning_rate": 1.0586628802987108e-07, "loss": 0.0496, "step": 7159, "task_loss": 0.03225575387477875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996613769617816, "compression_loss": 0.0, "distillation_loss": 0.02196839265525341, "epoch": 6.8, "learning_rate": 1.0488899813966835e-07, "loss": 0.0267, "step": 7160, "task_loss": 0.06964989006519318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996621793857086, "compression_loss": 0.0, "distillation_loss": 0.06468576192855835, "epoch": 6.8, "learning_rate": 1.0391623056627275e-07, "loss": 0.0656, "step": 7161, "task_loss": 0.07367479801177979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996629805409815, "compression_loss": 0.0, "distillation_loss": 0.04920611530542374, "epoch": 6.8, "learning_rate": 1.0294798548639573e-07, "loss": 0.0528, "step": 7162, "task_loss": 0.08470079302787781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996637804286039, "compression_loss": 0.0, "distillation_loss": 0.032296232879161835, "epoch": 6.8, "learning_rate": 1.0198426307592157e-07, "loss": 0.0307, "step": 7163, "task_loss": 0.01606859639286995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996645790495794, "compression_loss": 0.0, "distillation_loss": 0.04291418194770813, "epoch": 6.8, "learning_rate": 1.010250635099158e-07, "loss": 0.0641, "step": 7164, "task_loss": 0.25430774688720703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996653764049119, "compression_loss": 0.0, "distillation_loss": 0.02546636387705803, "epoch": 6.8, "learning_rate": 1.0007038696262516e-07, "loss": 0.0302, "step": 7165, "task_loss": 0.07231911271810532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996661724956049, "compression_loss": 0.0, "distillation_loss": 0.028939777985215187, "epoch": 6.81, "learning_rate": 9.912023360746647e-08, "loss": 0.0268, "step": 7166, "task_loss": 0.007344096899032593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996669673226621, "compression_loss": 0.0, "distillation_loss": 0.11444491147994995, "epoch": 6.81, "learning_rate": 9.817460361704056e-08, "loss": 0.1122, "step": 7167, "task_loss": 0.0920182541012764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996677608870872, "compression_loss": 0.0, "distillation_loss": 0.10061150789260864, "epoch": 6.81, "learning_rate": 9.723349716312668e-08, "loss": 0.0963, "step": 7168, "task_loss": 0.05738703906536102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996685531898841, "compression_loss": 0.0, "distillation_loss": 0.08503048866987228, "epoch": 6.81, "learning_rate": 9.629691441667976e-08, "loss": 0.093, "step": 7169, "task_loss": 0.1647735834121704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996693442320562, "compression_loss": 0.0, "distillation_loss": 0.028403718024492264, "epoch": 6.81, "learning_rate": 9.53648555478387e-08, "loss": 0.0262, "step": 7170, "task_loss": 0.006750069558620453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996701340146072, "compression_loss": 0.0, "distillation_loss": 0.01994595304131508, "epoch": 6.81, "learning_rate": 9.443732072591249e-08, "loss": 0.0188, "step": 7171, "task_loss": 0.008648447692394257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996709225385409, "compression_loss": 0.0, "distillation_loss": 0.06424511969089508, "epoch": 6.81, "learning_rate": 9.351431011939138e-08, "loss": 0.0761, "step": 7172, "task_loss": 0.18302549421787262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799671709804861, "compression_loss": 0.0, "distillation_loss": 0.020225565880537033, "epoch": 6.81, "learning_rate": 9.259582389594956e-08, "loss": 0.0185, "step": 7173, "task_loss": 0.002962857484817505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799672495814571, "compression_loss": 0.0, "distillation_loss": 0.07207255065441132, "epoch": 6.81, "learning_rate": 9.168186222243136e-08, "loss": 0.0706, "step": 7174, "task_loss": 0.05724616348743439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996732805686748, "compression_loss": 0.0, "distillation_loss": 0.02577575296163559, "epoch": 6.81, "learning_rate": 9.077242526485952e-08, "loss": 0.027, "step": 7175, "task_loss": 0.03761523589491844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799674064068176, "compression_loss": 0.0, "distillation_loss": 0.03172352910041809, "epoch": 6.81, "learning_rate": 8.986751318844355e-08, "loss": 0.0399, "step": 7176, "task_loss": 0.11340309679508209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996748463140781, "compression_loss": 0.0, "distillation_loss": 0.03157733753323555, "epoch": 6.82, "learning_rate": 8.896712615756308e-08, "loss": 0.0311, "step": 7177, "task_loss": 0.02710825763642788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996756273073852, "compression_loss": 0.0, "distillation_loss": 0.01715652272105217, "epoch": 6.82, "learning_rate": 8.807126433577617e-08, "loss": 0.0165, "step": 7178, "task_loss": 0.010277284309267998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996764070491006, "compression_loss": 0.0, "distillation_loss": 0.03965865075588226, "epoch": 6.82, "learning_rate": 8.717992788582207e-08, "loss": 0.0431, "step": 7179, "task_loss": 0.07392223924398422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799677185540228, "compression_loss": 0.0, "distillation_loss": 0.04710112512111664, "epoch": 6.82, "learning_rate": 8.629311696961295e-08, "loss": 0.0486, "step": 7180, "task_loss": 0.06256051361560822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996779627817714, "compression_loss": 0.0, "distillation_loss": 0.04641801118850708, "epoch": 6.82, "learning_rate": 8.541083174824771e-08, "loss": 0.0597, "step": 7181, "task_loss": 0.17936520278453827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996787387747342, "compression_loss": 0.0, "distillation_loss": 0.017239782959222794, "epoch": 6.82, "learning_rate": 8.453307238199259e-08, "loss": 0.0159, "step": 7182, "task_loss": 0.003765510395169258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996795135201201, "compression_loss": 0.0, "distillation_loss": 0.13984356820583344, "epoch": 6.82, "learning_rate": 8.365983903030061e-08, "loss": 0.139, "step": 7183, "task_loss": 0.13107913732528687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996802870189329, "compression_loss": 0.0, "distillation_loss": 0.07437920570373535, "epoch": 6.82, "learning_rate": 8.279113185179488e-08, "loss": 0.0826, "step": 7184, "task_loss": 0.15642966330051422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996810592721761, "compression_loss": 0.0, "distillation_loss": 0.036259036511182785, "epoch": 6.82, "learning_rate": 8.192695100428527e-08, "loss": 0.0332, "step": 7185, "task_loss": 0.005903052166104317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996818302808537, "compression_loss": 0.0, "distillation_loss": 0.018497422337532043, "epoch": 6.82, "learning_rate": 8.106729664475176e-08, "loss": 0.0241, "step": 7186, "task_loss": 0.07415502518415451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799682600045969, "compression_loss": 0.0, "distillation_loss": 0.03047127276659012, "epoch": 6.83, "learning_rate": 8.021216892935279e-08, "loss": 0.0313, "step": 7187, "task_loss": 0.038899097591638565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799683368568526, "compression_loss": 0.0, "distillation_loss": 0.053710635751485825, "epoch": 6.83, "learning_rate": 7.936156801342797e-08, "loss": 0.0536, "step": 7188, "task_loss": 0.052580974996089935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996841358495282, "compression_loss": 0.0, "distillation_loss": 0.06118376553058624, "epoch": 6.83, "learning_rate": 7.851549405149539e-08, "loss": 0.0601, "step": 7189, "task_loss": 0.050785936415195465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996849018899793, "compression_loss": 0.0, "distillation_loss": 0.02713852934539318, "epoch": 6.83, "learning_rate": 7.767394719724597e-08, "loss": 0.0425, "step": 7190, "task_loss": 0.18106593191623688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996856666908831, "compression_loss": 0.0, "distillation_loss": 0.06980240345001221, "epoch": 6.83, "learning_rate": 7.683692760355187e-08, "loss": 0.076, "step": 7191, "task_loss": 0.13159391283988953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996864302532432, "compression_loss": 0.0, "distillation_loss": 0.019683901220560074, "epoch": 6.83, "learning_rate": 7.600443542246094e-08, "loss": 0.0184, "step": 7192, "task_loss": 0.00683322548866272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996871925780632, "compression_loss": 0.0, "distillation_loss": 0.02726883627474308, "epoch": 6.83, "learning_rate": 7.51764708051994e-08, "loss": 0.0323, "step": 7193, "task_loss": 0.0772031769156456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996879536663469, "compression_loss": 0.0, "distillation_loss": 0.15833452343940735, "epoch": 6.83, "learning_rate": 7.435303390216919e-08, "loss": 0.1523, "step": 7194, "task_loss": 0.09784331917762756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799688713519098, "compression_loss": 0.0, "distillation_loss": 0.02268981747329235, "epoch": 6.83, "learning_rate": 7.353412486295619e-08, "loss": 0.0208, "step": 7195, "task_loss": 0.004109309986233711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79968947213732, "compression_loss": 0.0, "distillation_loss": 0.024118095636367798, "epoch": 6.83, "learning_rate": 7.271974383631642e-08, "loss": 0.0263, "step": 7196, "task_loss": 0.04595687985420227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996902295220168, "compression_loss": 0.0, "distillation_loss": 0.028797656297683716, "epoch": 6.83, "learning_rate": 7.19098909701843e-08, "loss": 0.0415, "step": 7197, "task_loss": 0.1560361236333847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799690985674192, "compression_loss": 0.0, "distillation_loss": 0.03179541975259781, "epoch": 6.84, "learning_rate": 7.110456641167829e-08, "loss": 0.0305, "step": 7198, "task_loss": 0.018481941893696785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996917405948492, "compression_loss": 0.0, "distillation_loss": 0.01870710775256157, "epoch": 6.84, "learning_rate": 7.030377030708412e-08, "loss": 0.0181, "step": 7199, "task_loss": 0.012378891929984093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996924942849922, "compression_loss": 0.0, "distillation_loss": 0.06044170260429382, "epoch": 6.84, "learning_rate": 6.950750280187435e-08, "loss": 0.0777, "step": 7200, "task_loss": 0.23314093053340912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996932467456246, "compression_loss": 0.0, "distillation_loss": 0.05844536051154137, "epoch": 6.84, "learning_rate": 6.871576404069158e-08, "loss": 0.0756, "step": 7201, "task_loss": 0.23015083372592926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996939979777502, "compression_loss": 0.0, "distillation_loss": 0.01944398134946823, "epoch": 6.84, "learning_rate": 6.792855416736243e-08, "loss": 0.0264, "step": 7202, "task_loss": 0.08910975605249405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996947479823725, "compression_loss": 0.0, "distillation_loss": 0.024285856634378433, "epoch": 6.84, "learning_rate": 6.714587332488364e-08, "loss": 0.0317, "step": 7203, "task_loss": 0.09848549962043762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996954967604953, "compression_loss": 0.0, "distillation_loss": 0.06020985543727875, "epoch": 6.84, "learning_rate": 6.63677216554387e-08, "loss": 0.0604, "step": 7204, "task_loss": 0.062497399747371674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996962443131224, "compression_loss": 0.0, "distillation_loss": 0.03330262005329132, "epoch": 6.84, "learning_rate": 6.559409930037563e-08, "loss": 0.0311, "step": 7205, "task_loss": 0.011487048119306564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996969906412571, "compression_loss": 0.0, "distillation_loss": 0.08118142187595367, "epoch": 6.84, "learning_rate": 6.482500640022926e-08, "loss": 0.0848, "step": 7206, "task_loss": 0.11695171892642975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996977357459035, "compression_loss": 0.0, "distillation_loss": 0.030360179021954536, "epoch": 6.84, "learning_rate": 6.406044309471005e-08, "loss": 0.0329, "step": 7207, "task_loss": 0.05534449219703674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996984796280651, "compression_loss": 0.0, "distillation_loss": 0.08223249763250351, "epoch": 6.85, "learning_rate": 6.406044309471005e-08, "loss": 0.0893, "step": 7208, "task_loss": 0.15278904139995575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996992222887456, "compression_loss": 0.0, "distillation_loss": 0.022313468158245087, "epoch": 6.85, "learning_rate": 6.330040952270688e-08, "loss": 0.0244, "step": 7209, "task_loss": 0.04346153512597084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7996999637289486, "compression_loss": 0.0, "distillation_loss": 0.0184345506131649, "epoch": 6.85, "learning_rate": 6.254490582227879e-08, "loss": 0.017, "step": 7210, "task_loss": 0.004323702305555344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799700703949678, "compression_loss": 0.0, "distillation_loss": 0.020290520042181015, "epoch": 6.85, "learning_rate": 6.179393213066875e-08, "loss": 0.0271, "step": 7211, "task_loss": 0.08879521489143372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997014429519371, "compression_loss": 0.0, "distillation_loss": 0.020489336922764778, "epoch": 6.85, "learning_rate": 6.104748858429266e-08, "loss": 0.0298, "step": 7212, "task_loss": 0.11318296194076538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79970218073673, "compression_loss": 0.0, "distillation_loss": 0.05874666944146156, "epoch": 6.85, "learning_rate": 6.030557531875036e-08, "loss": 0.0728, "step": 7213, "task_loss": 0.1989019811153412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997029173050602, "compression_loss": 0.0, "distillation_loss": 0.03400994837284088, "epoch": 6.85, "learning_rate": 5.956819246881185e-08, "loss": 0.0417, "step": 7214, "task_loss": 0.11090053617954254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997036526579313, "compression_loss": 0.0, "distillation_loss": 0.02666945569217205, "epoch": 6.85, "learning_rate": 5.8835340168422734e-08, "loss": 0.027, "step": 7215, "task_loss": 0.029919061809778214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997043867963471, "compression_loss": 0.0, "distillation_loss": 0.032640133053064346, "epoch": 6.85, "learning_rate": 5.8107018550712656e-08, "loss": 0.0301, "step": 7216, "task_loss": 0.0070366039872169495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997051197213113, "compression_loss": 0.0, "distillation_loss": 0.02398163080215454, "epoch": 6.85, "learning_rate": 5.7383227747984105e-08, "loss": 0.0315, "step": 7217, "task_loss": 0.09866875410079956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997058514338276, "compression_loss": 0.0, "distillation_loss": 0.01552260760217905, "epoch": 6.85, "learning_rate": 5.6663967891718015e-08, "loss": 0.0146, "step": 7218, "task_loss": 0.006190884858369827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997065819348994, "compression_loss": 0.0, "distillation_loss": 0.06299366801977158, "epoch": 6.86, "learning_rate": 5.5949239112570997e-08, "loss": 0.0748, "step": 7219, "task_loss": 0.18070363998413086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997073112255307, "compression_loss": 0.0, "distillation_loss": 0.056522756814956665, "epoch": 6.86, "learning_rate": 5.523904154037529e-08, "loss": 0.0572, "step": 7220, "task_loss": 0.06307417154312134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997080393067251, "compression_loss": 0.0, "distillation_loss": 0.02577255666255951, "epoch": 6.86, "learning_rate": 5.453337530414437e-08, "loss": 0.0269, "step": 7221, "task_loss": 0.03711457550525665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997087661794863, "compression_loss": 0.0, "distillation_loss": 0.024436548352241516, "epoch": 6.86, "learning_rate": 5.383224053206459e-08, "loss": 0.0283, "step": 7222, "task_loss": 0.06333892792463303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997094918448179, "compression_loss": 0.0, "distillation_loss": 0.040110617876052856, "epoch": 6.86, "learning_rate": 5.313563735149796e-08, "loss": 0.0419, "step": 7223, "task_loss": 0.057536765933036804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997102163037235, "compression_loss": 0.0, "distillation_loss": 0.026415549218654633, "epoch": 6.86, "learning_rate": 5.2443565888990466e-08, "loss": 0.0339, "step": 7224, "task_loss": 0.10137321054935455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997109395572071, "compression_loss": 0.0, "distillation_loss": 0.05468286946415901, "epoch": 6.86, "learning_rate": 5.1756026270258215e-08, "loss": 0.0534, "step": 7225, "task_loss": 0.0421706885099411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799711661606272, "compression_loss": 0.0, "distillation_loss": 0.04075568914413452, "epoch": 6.86, "learning_rate": 5.107301862019575e-08, "loss": 0.0453, "step": 7226, "task_loss": 0.08584432303905487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997123824519222, "compression_loss": 0.0, "distillation_loss": 0.04388091713190079, "epoch": 6.86, "learning_rate": 5.0394543062873276e-08, "loss": 0.053, "step": 7227, "task_loss": 0.13531340658664703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997131020951613, "compression_loss": 0.0, "distillation_loss": 0.03439214825630188, "epoch": 6.86, "learning_rate": 4.972059972154219e-08, "loss": 0.0332, "step": 7228, "task_loss": 0.021979160606861115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997138205369929, "compression_loss": 0.0, "distillation_loss": 0.034759216010570526, "epoch": 6.87, "learning_rate": 4.905118871862402e-08, "loss": 0.038, "step": 7229, "task_loss": 0.06708045303821564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997145377784206, "compression_loss": 0.0, "distillation_loss": 0.08172931522130966, "epoch": 6.87, "learning_rate": 4.838631017572426e-08, "loss": 0.0904, "step": 7230, "task_loss": 0.16795513033866882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997152538204483, "compression_loss": 0.0, "distillation_loss": 0.027006901800632477, "epoch": 6.87, "learning_rate": 4.772596421361852e-08, "loss": 0.0248, "step": 7231, "task_loss": 0.0044814664870500565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997159686640796, "compression_loss": 0.0, "distillation_loss": 0.053238674998283386, "epoch": 6.87, "learning_rate": 4.7070150952263634e-08, "loss": 0.068, "step": 7232, "task_loss": 0.2006755918264389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997166823103181, "compression_loss": 0.0, "distillation_loss": 0.04943888634443283, "epoch": 6.87, "learning_rate": 4.64188705107893e-08, "loss": 0.0456, "step": 7233, "task_loss": 0.011105941608548164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997173947601676, "compression_loss": 0.0, "distillation_loss": 0.025495588779449463, "epoch": 6.87, "learning_rate": 4.577212300750644e-08, "loss": 0.0365, "step": 7234, "task_loss": 0.1354934424161911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997181060146317, "compression_loss": 0.0, "distillation_loss": 0.021435732021927834, "epoch": 6.87, "learning_rate": 4.5129908559896075e-08, "loss": 0.0301, "step": 7235, "task_loss": 0.10771320015192032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997188160747141, "compression_loss": 0.0, "distillation_loss": 0.03189963847398758, "epoch": 6.87, "learning_rate": 4.449222728462599e-08, "loss": 0.0375, "step": 7236, "task_loss": 0.08780453354120255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997195249414185, "compression_loss": 0.0, "distillation_loss": 0.08029976487159729, "epoch": 6.87, "learning_rate": 4.3859079297525754e-08, "loss": 0.0884, "step": 7237, "task_loss": 0.1608220934867859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997202326157485, "compression_loss": 0.0, "distillation_loss": 0.07394590228796005, "epoch": 6.87, "learning_rate": 4.323046471361447e-08, "loss": 0.0696, "step": 7238, "task_loss": 0.0303100124001503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799720939098708, "compression_loss": 0.0, "distillation_loss": 0.08067163825035095, "epoch": 6.87, "learning_rate": 4.2606383647084134e-08, "loss": 0.0771, "step": 7239, "task_loss": 0.04478314146399498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997216443913004, "compression_loss": 0.0, "distillation_loss": 0.0285223089158535, "epoch": 6.88, "learning_rate": 4.198683621129962e-08, "loss": 0.0336, "step": 7240, "task_loss": 0.07915632426738739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997223484945296, "compression_loss": 0.0, "distillation_loss": 0.02331477962434292, "epoch": 6.88, "learning_rate": 4.1371822518804224e-08, "loss": 0.0306, "step": 7241, "task_loss": 0.09588571637868881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997230514093993, "compression_loss": 0.0, "distillation_loss": 0.07636144757270813, "epoch": 6.88, "learning_rate": 4.0761342681319706e-08, "loss": 0.0866, "step": 7242, "task_loss": 0.1786966323852539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997237531369129, "compression_loss": 0.0, "distillation_loss": 0.08760742098093033, "epoch": 6.88, "learning_rate": 4.015539680974345e-08, "loss": 0.0921, "step": 7243, "task_loss": 0.13226494193077087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997244536780743, "compression_loss": 0.0, "distillation_loss": 0.07857417315244675, "epoch": 6.88, "learning_rate": 3.955398501414576e-08, "loss": 0.0795, "step": 7244, "task_loss": 0.08820837736129761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997251530338872, "compression_loss": 0.0, "distillation_loss": 0.024933654814958572, "epoch": 6.88, "learning_rate": 3.895710740378089e-08, "loss": 0.0264, "step": 7245, "task_loss": 0.03946792706847191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997258512053552, "compression_loss": 0.0, "distillation_loss": 0.0409945584833622, "epoch": 6.88, "learning_rate": 3.8364764087067685e-08, "loss": 0.041, "step": 7246, "task_loss": 0.04090442508459091 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799726548193482, "compression_loss": 0.0, "distillation_loss": 0.1333804726600647, "epoch": 6.88, "learning_rate": 3.777695517161173e-08, "loss": 0.1276, "step": 7247, "task_loss": 0.07556381076574326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997272439992713, "compression_loss": 0.0, "distillation_loss": 0.03527265042066574, "epoch": 6.88, "learning_rate": 3.7193680764191496e-08, "loss": 0.0327, "step": 7248, "task_loss": 0.00909445621073246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997279386237268, "compression_loss": 0.0, "distillation_loss": 0.0798153355717659, "epoch": 6.88, "learning_rate": 3.66149409707639e-08, "loss": 0.0749, "step": 7249, "task_loss": 0.031093858182430267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997286320678522, "compression_loss": 0.0, "distillation_loss": 0.04000500962138176, "epoch": 6.89, "learning_rate": 3.604073589645596e-08, "loss": 0.0366, "step": 7250, "task_loss": 0.00630572997033596 }, { "epoch": 6.89, "eval_accuracy": 0.8956422018348624, "eval_loss": 0.4339929223060608, "eval_runtime": 18.2364, "eval_samples_per_second": 47.816, "eval_steps_per_second": 5.977, "step": 7250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997293243326511, "compression_loss": 0.0, "distillation_loss": 0.024513155221939087, "epoch": 6.89, "learning_rate": 3.547106564557312e-08, "loss": 0.0265, "step": 7251, "task_loss": 0.0446399450302124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997300154191272, "compression_loss": 0.0, "distillation_loss": 0.02782837674021721, "epoch": 6.89, "learning_rate": 3.490593032160483e-08, "loss": 0.0319, "step": 7252, "task_loss": 0.06867832690477371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997307053282843, "compression_loss": 0.0, "distillation_loss": 0.02256028726696968, "epoch": 6.89, "learning_rate": 3.4345330027207856e-08, "loss": 0.0274, "step": 7253, "task_loss": 0.07064933329820633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997313940611258, "compression_loss": 0.0, "distillation_loss": 0.047051578760147095, "epoch": 6.89, "learning_rate": 3.378926486421463e-08, "loss": 0.061, "step": 7254, "task_loss": 0.18665584921836853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997320816186557, "compression_loss": 0.0, "distillation_loss": 0.016445623710751534, "epoch": 6.89, "learning_rate": 3.3237734933641574e-08, "loss": 0.0218, "step": 7255, "task_loss": 0.07008873671293259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997327680018775, "compression_loss": 0.0, "distillation_loss": 0.03820062056183815, "epoch": 6.89, "learning_rate": 3.269074033567798e-08, "loss": 0.0358, "step": 7256, "task_loss": 0.014143818989396095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799733453211795, "compression_loss": 0.0, "distillation_loss": 0.03633254021406174, "epoch": 6.89, "learning_rate": 3.21482811696805e-08, "loss": 0.0364, "step": 7257, "task_loss": 0.036846742033958435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997341372494118, "compression_loss": 0.0, "distillation_loss": 0.03856148198246956, "epoch": 6.89, "learning_rate": 3.161035753419805e-08, "loss": 0.0463, "step": 7258, "task_loss": 0.11632915586233139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997348201157316, "compression_loss": 0.0, "distillation_loss": 0.018683161586523056, "epoch": 6.89, "learning_rate": 3.107696952694139e-08, "loss": 0.0372, "step": 7259, "task_loss": 0.20371021330356598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799735501811758, "compression_loss": 0.0, "distillation_loss": 0.018803803250193596, "epoch": 6.89, "learning_rate": 3.054811724480522e-08, "loss": 0.0181, "step": 7260, "task_loss": 0.0114500243216753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997361823384949, "compression_loss": 0.0, "distillation_loss": 0.014816414564847946, "epoch": 6.9, "learning_rate": 3.0023800783857134e-08, "loss": 0.0141, "step": 7261, "task_loss": 0.0073722414672374725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997368616969457, "compression_loss": 0.0, "distillation_loss": 0.020906083285808563, "epoch": 6.9, "learning_rate": 2.950402023934318e-08, "loss": 0.0235, "step": 7262, "task_loss": 0.046787478029727936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997375398881144, "compression_loss": 0.0, "distillation_loss": 0.045058127492666245, "epoch": 6.9, "learning_rate": 2.898877570568226e-08, "loss": 0.0595, "step": 7263, "task_loss": 0.1898675560951233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997382169130043, "compression_loss": 0.0, "distillation_loss": 0.050636500120162964, "epoch": 6.9, "learning_rate": 2.8478067276471733e-08, "loss": 0.0479, "step": 7264, "task_loss": 0.02349741943180561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997388927726194, "compression_loss": 0.0, "distillation_loss": 0.049480557441711426, "epoch": 6.9, "learning_rate": 2.7971895044487385e-08, "loss": 0.0677, "step": 7265, "task_loss": 0.23184102773666382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997395674679633, "compression_loss": 0.0, "distillation_loss": 0.10401052981615067, "epoch": 6.9, "learning_rate": 2.7470259101672336e-08, "loss": 0.0976, "step": 7266, "task_loss": 0.04024471715092659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997402410000396, "compression_loss": 0.0, "distillation_loss": 0.018137693405151367, "epoch": 6.9, "learning_rate": 2.6973159539153693e-08, "loss": 0.0169, "step": 7267, "task_loss": 0.006021425127983093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799740913369852, "compression_loss": 0.0, "distillation_loss": 0.024599701166152954, "epoch": 6.9, "learning_rate": 2.648059644723144e-08, "loss": 0.0234, "step": 7268, "task_loss": 0.012996546924114227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997415845784043, "compression_loss": 0.0, "distillation_loss": 0.030850766226649284, "epoch": 6.9, "learning_rate": 2.5992569915384014e-08, "loss": 0.051, "step": 7269, "task_loss": 0.2318606823682785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997422546267, "compression_loss": 0.0, "distillation_loss": 0.16657190024852753, "epoch": 6.9, "learning_rate": 2.550908003226271e-08, "loss": 0.1601, "step": 7270, "task_loss": 0.10178233683109283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997429235157429, "compression_loss": 0.0, "distillation_loss": 0.031046094372868538, "epoch": 6.91, "learning_rate": 2.5030126885694506e-08, "loss": 0.0305, "step": 7271, "task_loss": 0.02560516819357872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997435912465368, "compression_loss": 0.0, "distillation_loss": 0.018669994547963142, "epoch": 6.91, "learning_rate": 2.4555710562684796e-08, "loss": 0.0276, "step": 7272, "task_loss": 0.10760626196861267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997442578200852, "compression_loss": 0.0, "distillation_loss": 0.04650671035051346, "epoch": 6.91, "learning_rate": 2.408583114941465e-08, "loss": 0.0566, "step": 7273, "task_loss": 0.14748124778270721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997449232373918, "compression_loss": 0.0, "distillation_loss": 0.0697929635643959, "epoch": 6.91, "learning_rate": 2.3620488731235234e-08, "loss": 0.0814, "step": 7274, "task_loss": 0.18626686930656433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997455874994602, "compression_loss": 0.0, "distillation_loss": 0.03916598856449127, "epoch": 6.91, "learning_rate": 2.3159683392684483e-08, "loss": 0.0364, "step": 7275, "task_loss": 0.011343579739332199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997462506072943, "compression_loss": 0.0, "distillation_loss": 0.05003172531723976, "epoch": 6.91, "learning_rate": 2.2703415217464885e-08, "loss": 0.0553, "step": 7276, "task_loss": 0.10285012423992157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997469125618977, "compression_loss": 0.0, "distillation_loss": 0.05633166432380676, "epoch": 6.91, "learning_rate": 2.2251684288462915e-08, "loss": 0.0563, "step": 7277, "task_loss": 0.055823661386966705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799747573364274, "compression_loss": 0.0, "distillation_loss": 0.02220912277698517, "epoch": 6.91, "learning_rate": 2.180449068773793e-08, "loss": 0.0213, "step": 7278, "task_loss": 0.013472869992256165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799748233015427, "compression_loss": 0.0, "distillation_loss": 0.03192361444234848, "epoch": 6.91, "learning_rate": 2.136183449652218e-08, "loss": 0.0313, "step": 7279, "task_loss": 0.025719383731484413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997488915163603, "compression_loss": 0.0, "distillation_loss": 0.08539696782827377, "epoch": 6.91, "learning_rate": 2.0923715795229115e-08, "loss": 0.0879, "step": 7280, "task_loss": 0.11043383926153183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997495488680776, "compression_loss": 0.0, "distillation_loss": 0.050244055688381195, "epoch": 6.91, "learning_rate": 2.0490134663442295e-08, "loss": 0.0595, "step": 7281, "task_loss": 0.14261166751384735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997502050715826, "compression_loss": 0.0, "distillation_loss": 0.020430155098438263, "epoch": 6.92, "learning_rate": 2.006109117992372e-08, "loss": 0.0187, "step": 7282, "task_loss": 0.003576911985874176 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997508601278789, "compression_loss": 0.0, "distillation_loss": 0.07387155294418335, "epoch": 6.92, "learning_rate": 1.9636585422616593e-08, "loss": 0.0831, "step": 7283, "task_loss": 0.16649088263511658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997515140379703, "compression_loss": 0.0, "distillation_loss": 0.024126257747411728, "epoch": 6.92, "learning_rate": 1.9216617468625908e-08, "loss": 0.0239, "step": 7284, "task_loss": 0.022267458960413933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997521668028604, "compression_loss": 0.0, "distillation_loss": 0.1891712099313736, "epoch": 6.92, "learning_rate": 1.8801187394248965e-08, "loss": 0.1828, "step": 7285, "task_loss": 0.12499140202999115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799752818423553, "compression_loss": 0.0, "distillation_loss": 0.02490517869591713, "epoch": 6.92, "learning_rate": 1.8390295274944846e-08, "loss": 0.0272, "step": 7286, "task_loss": 0.048274118453264236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997534689010516, "compression_loss": 0.0, "distillation_loss": 0.031054774299263954, "epoch": 6.92, "learning_rate": 1.7983941185356622e-08, "loss": 0.029, "step": 7287, "task_loss": 0.010997863486409187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.79975411823636, "compression_loss": 0.0, "distillation_loss": 0.02562297135591507, "epoch": 6.92, "learning_rate": 1.7582125199303023e-08, "loss": 0.0392, "step": 7288, "task_loss": 0.16150373220443726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997547664304819, "compression_loss": 0.0, "distillation_loss": 0.06376007199287415, "epoch": 6.92, "learning_rate": 1.7184847389770108e-08, "loss": 0.0598, "step": 7289, "task_loss": 0.02367803268134594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997554134844209, "compression_loss": 0.0, "distillation_loss": 0.014210294932126999, "epoch": 6.92, "learning_rate": 1.679210782892793e-08, "loss": 0.0297, "step": 7290, "task_loss": 0.1686396598815918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997560593991807, "compression_loss": 0.0, "distillation_loss": 0.02178233675658703, "epoch": 6.92, "learning_rate": 1.6403906588122185e-08, "loss": 0.0201, "step": 7291, "task_loss": 0.005438664928078651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799756704175765, "compression_loss": 0.0, "distillation_loss": 0.055342864245176315, "epoch": 6.92, "learning_rate": 1.6020243737865927e-08, "loss": 0.0638, "step": 7292, "task_loss": 0.1397797167301178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997573478151776, "compression_loss": 0.0, "distillation_loss": 0.025284867733716965, "epoch": 6.93, "learning_rate": 1.5641119347858947e-08, "loss": 0.024, "step": 7293, "task_loss": 0.012278718873858452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799757990318422, "compression_loss": 0.0, "distillation_loss": 0.013603068888187408, "epoch": 6.93, "learning_rate": 1.526653348696838e-08, "loss": 0.0152, "step": 7294, "task_loss": 0.029902005568146706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799758631686502, "compression_loss": 0.0, "distillation_loss": 0.042981285601854324, "epoch": 6.93, "learning_rate": 1.4896486223239802e-08, "loss": 0.0405, "step": 7295, "task_loss": 0.0184940155595541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997592719204212, "compression_loss": 0.0, "distillation_loss": 0.02033732831478119, "epoch": 6.93, "learning_rate": 1.4530977623891662e-08, "loss": 0.0188, "step": 7296, "task_loss": 0.005222789943218231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997599110211833, "compression_loss": 0.0, "distillation_loss": 0.03475763648748398, "epoch": 6.93, "learning_rate": 1.4170007755326398e-08, "loss": 0.0357, "step": 7297, "task_loss": 0.04378265514969826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799760548989792, "compression_loss": 0.0, "distillation_loss": 0.03861427307128906, "epoch": 6.93, "learning_rate": 1.3813576683111006e-08, "loss": 0.0449, "step": 7298, "task_loss": 0.10126301646232605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799761185827251, "compression_loss": 0.0, "distillation_loss": 0.017223935574293137, "epoch": 6.93, "learning_rate": 1.3461684471993696e-08, "loss": 0.0219, "step": 7299, "task_loss": 0.06419990956783295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799761821534564, "compression_loss": 0.0, "distillation_loss": 0.053291283547878265, "epoch": 6.93, "learning_rate": 1.3114331185898331e-08, "loss": 0.0727, "step": 7300, "task_loss": 0.24761205911636353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997624561127346, "compression_loss": 0.0, "distillation_loss": 0.0634729415178299, "epoch": 6.93, "learning_rate": 1.2771516887921664e-08, "loss": 0.0634, "step": 7301, "task_loss": 0.0631205290555954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997630895627664, "compression_loss": 0.0, "distillation_loss": 0.018962126225233078, "epoch": 6.93, "learning_rate": 1.2433241640338878e-08, "loss": 0.03, "step": 7302, "task_loss": 0.12886041402816772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997637218856634, "compression_loss": 0.0, "distillation_loss": 0.16295194625854492, "epoch": 6.94, "learning_rate": 1.2099505504600817e-08, "loss": 0.169, "step": 7303, "task_loss": 0.22337132692337036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799764353082429, "compression_loss": 0.0, "distillation_loss": 0.027407968416810036, "epoch": 6.94, "learning_rate": 1.1770308541328434e-08, "loss": 0.0343, "step": 7304, "task_loss": 0.09634220600128174 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997649831540671, "compression_loss": 0.0, "distillation_loss": 0.023027902469038963, "epoch": 6.94, "learning_rate": 1.1445650810326668e-08, "loss": 0.0246, "step": 7305, "task_loss": 0.038522087037563324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997656121015811, "compression_loss": 0.0, "distillation_loss": 0.10117096453905106, "epoch": 6.94, "learning_rate": 1.1125532370567793e-08, "loss": 0.1038, "step": 7306, "task_loss": 0.12748871743679047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997662399259748, "compression_loss": 0.0, "distillation_loss": 0.029107600450515747, "epoch": 6.94, "learning_rate": 1.0809953280202511e-08, "loss": 0.0362, "step": 7307, "task_loss": 0.09982933849096298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997668666282521, "compression_loss": 0.0, "distillation_loss": 0.09483398497104645, "epoch": 6.94, "learning_rate": 1.0498913596559967e-08, "loss": 0.09, "step": 7308, "task_loss": 0.04666196554899216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997674922094165, "compression_loss": 0.0, "distillation_loss": 0.025720832869410515, "epoch": 6.94, "learning_rate": 1.0192413376139408e-08, "loss": 0.0305, "step": 7309, "task_loss": 0.07334055751562119 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997681166704715, "compression_loss": 0.0, "distillation_loss": 0.0329585038125515, "epoch": 6.94, "learning_rate": 9.890452674618522e-09, "loss": 0.0317, "step": 7310, "task_loss": 0.020583661273121834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799768740012421, "compression_loss": 0.0, "distillation_loss": 0.021503139287233353, "epoch": 6.94, "learning_rate": 9.593031546853426e-09, "loss": 0.0232, "step": 7311, "task_loss": 0.03804744780063629 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997693622362688, "compression_loss": 0.0, "distillation_loss": 0.053460683673620224, "epoch": 6.94, "learning_rate": 9.300150046864796e-09, "loss": 0.0505, "step": 7312, "task_loss": 0.02390519343316555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997699833430184, "compression_loss": 0.0, "distillation_loss": 0.01604924164712429, "epoch": 6.94, "learning_rate": 9.011808227865625e-09, "loss": 0.0208, "step": 7313, "task_loss": 0.06401699781417847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997706033336734, "compression_loss": 0.0, "distillation_loss": 0.012479826807975769, "epoch": 6.95, "learning_rate": 8.728006142225131e-09, "loss": 0.0118, "step": 7314, "task_loss": 0.005266919732093811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997712222092377, "compression_loss": 0.0, "distillation_loss": 0.05143355578184128, "epoch": 6.95, "learning_rate": 8.448743841504847e-09, "loss": 0.0551, "step": 7315, "task_loss": 0.08826296031475067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997718399707148, "compression_loss": 0.0, "distillation_loss": 0.06294449418783188, "epoch": 6.95, "learning_rate": 8.174021376428087e-09, "loss": 0.0601, "step": 7316, "task_loss": 0.03437443822622299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997724566191085, "compression_loss": 0.0, "distillation_loss": 0.08268649876117706, "epoch": 6.95, "learning_rate": 7.903838796904927e-09, "loss": 0.0772, "step": 7317, "task_loss": 0.028297651559114456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997730721554224, "compression_loss": 0.0, "distillation_loss": 0.15433204174041748, "epoch": 6.95, "learning_rate": 7.638196152010002e-09, "loss": 0.1521, "step": 7318, "task_loss": 0.1320251226425171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997736865806603, "compression_loss": 0.0, "distillation_loss": 0.018636398017406464, "epoch": 6.95, "learning_rate": 7.37709349000193e-09, "loss": 0.0172, "step": 7319, "task_loss": 0.0046669188886880875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997742998958257, "compression_loss": 0.0, "distillation_loss": 0.0154360830783844, "epoch": 6.95, "learning_rate": 7.120530858312213e-09, "loss": 0.0198, "step": 7320, "task_loss": 0.05901549756526947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997749121019225, "compression_loss": 0.0, "distillation_loss": 0.014429114758968353, "epoch": 6.95, "learning_rate": 6.8685083035452404e-09, "loss": 0.014, "step": 7321, "task_loss": 0.010067546740174294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997755231999542, "compression_loss": 0.0, "distillation_loss": 0.025817744433879852, "epoch": 6.95, "learning_rate": 6.621025871481057e-09, "loss": 0.0296, "step": 7322, "task_loss": 0.0636395514011383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997761331909246, "compression_loss": 0.0, "distillation_loss": 0.033705998212099075, "epoch": 6.95, "learning_rate": 6.378083607075369e-09, "loss": 0.0338, "step": 7323, "task_loss": 0.03477884456515312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997767420758373, "compression_loss": 0.0, "distillation_loss": 0.03906092047691345, "epoch": 6.96, "learning_rate": 6.139681554462318e-09, "loss": 0.0507, "step": 7324, "task_loss": 0.1549626588821411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997773498556959, "compression_loss": 0.0, "distillation_loss": 0.051231879740953445, "epoch": 6.96, "learning_rate": 5.905819756948927e-09, "loss": 0.0497, "step": 7325, "task_loss": 0.03555578738451004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997779565315044, "compression_loss": 0.0, "distillation_loss": 0.03208434581756592, "epoch": 6.96, "learning_rate": 5.676498257015106e-09, "loss": 0.041, "step": 7326, "task_loss": 0.12134852260351181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997785621042662, "compression_loss": 0.0, "distillation_loss": 0.1095762699842453, "epoch": 6.96, "learning_rate": 5.4517170963191974e-09, "loss": 0.1053, "step": 7327, "task_loss": 0.06731220334768295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799779166574985, "compression_loss": 0.0, "distillation_loss": 0.03222663700580597, "epoch": 6.96, "learning_rate": 5.231476315695205e-09, "loss": 0.0354, "step": 7328, "task_loss": 0.0638594701886177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997797699446646, "compression_loss": 0.0, "distillation_loss": 0.01883310079574585, "epoch": 6.96, "learning_rate": 5.015775955150015e-09, "loss": 0.0177, "step": 7329, "task_loss": 0.007643511518836021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997803722143086, "compression_loss": 0.0, "distillation_loss": 0.018376603722572327, "epoch": 6.96, "learning_rate": 4.804616053866173e-09, "loss": 0.0172, "step": 7330, "task_loss": 0.00702180340886116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997809733849208, "compression_loss": 0.0, "distillation_loss": 0.06521350890398026, "epoch": 6.96, "learning_rate": 4.5979966501991104e-09, "loss": 0.0821, "step": 7331, "task_loss": 0.2336568832397461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997815734575047, "compression_loss": 0.0, "distillation_loss": 0.045030295848846436, "epoch": 6.96, "learning_rate": 4.395917781688242e-09, "loss": 0.0426, "step": 7332, "task_loss": 0.021171823143959045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997821724330642, "compression_loss": 0.0, "distillation_loss": 0.06834699958562851, "epoch": 6.96, "learning_rate": 4.19837948503754e-09, "loss": 0.0635, "step": 7333, "task_loss": 0.019571850076317787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997827703126027, "compression_loss": 0.0, "distillation_loss": 0.039298366755247116, "epoch": 6.96, "learning_rate": 4.0053817961321905e-09, "loss": 0.0437, "step": 7334, "task_loss": 0.08349810540676117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799783367097124, "compression_loss": 0.0, "distillation_loss": 0.04764103144407272, "epoch": 6.97, "learning_rate": 3.8169247500330355e-09, "loss": 0.0447, "step": 7335, "task_loss": 0.01831081695854664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799783962787632, "compression_loss": 0.0, "distillation_loss": 0.05844502151012421, "epoch": 6.97, "learning_rate": 3.633008380971026e-09, "loss": 0.0784, "step": 7336, "task_loss": 0.25827866792678833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997845573851301, "compression_loss": 0.0, "distillation_loss": 0.03211439028382301, "epoch": 6.97, "learning_rate": 3.453632722358324e-09, "loss": 0.0347, "step": 7337, "task_loss": 0.058454521000385284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997851508906221, "compression_loss": 0.0, "distillation_loss": 0.021087612956762314, "epoch": 6.97, "learning_rate": 3.278797806774425e-09, "loss": 0.0199, "step": 7338, "task_loss": 0.009137500077486038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997857433051117, "compression_loss": 0.0, "distillation_loss": 0.07293461263179779, "epoch": 6.97, "learning_rate": 3.1085036659855847e-09, "loss": 0.0815, "step": 7339, "task_loss": 0.15890468657016754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997863346296024, "compression_loss": 0.0, "distillation_loss": 0.04177660495042801, "epoch": 6.97, "learning_rate": 2.9427503309226166e-09, "loss": 0.0499, "step": 7340, "task_loss": 0.12267521023750305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997869248650982, "compression_loss": 0.0, "distillation_loss": 0.06666393578052521, "epoch": 6.97, "learning_rate": 2.7815378316947694e-09, "loss": 0.0758, "step": 7341, "task_loss": 0.15778383612632751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997875140126025, "compression_loss": 0.0, "distillation_loss": 0.04870596528053284, "epoch": 6.97, "learning_rate": 2.624866197589726e-09, "loss": 0.046, "step": 7342, "task_loss": 0.021872013807296753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997881020731191, "compression_loss": 0.0, "distillation_loss": 0.02343062311410904, "epoch": 6.97, "learning_rate": 2.4727354570680537e-09, "loss": 0.0323, "step": 7343, "task_loss": 0.11193156987428665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997886890476517, "compression_loss": 0.0, "distillation_loss": 0.0678223967552185, "epoch": 6.97, "learning_rate": 2.3251456377604288e-09, "loss": 0.0726, "step": 7344, "task_loss": 0.1155007928609848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799789274937204, "compression_loss": 0.0, "distillation_loss": 0.11146111786365509, "epoch": 6.98, "learning_rate": 2.1820967664815116e-09, "loss": 0.1153, "step": 7345, "task_loss": 0.15025877952575684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997898597427796, "compression_loss": 0.0, "distillation_loss": 0.058320388197898865, "epoch": 6.98, "learning_rate": 2.043588869216073e-09, "loss": 0.0701, "step": 7346, "task_loss": 0.17589399218559265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997904434653822, "compression_loss": 0.0, "distillation_loss": 0.03350623697042465, "epoch": 6.98, "learning_rate": 1.9096219711245423e-09, "loss": 0.0307, "step": 7347, "task_loss": 0.005610005930066109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997910261060155, "compression_loss": 0.0, "distillation_loss": 0.02051948755979538, "epoch": 6.98, "learning_rate": 1.780196096540232e-09, "loss": 0.0246, "step": 7348, "task_loss": 0.06097707524895668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997916076656832, "compression_loss": 0.0, "distillation_loss": 0.021704774349927902, "epoch": 6.98, "learning_rate": 1.6553112689776662e-09, "loss": 0.0294, "step": 7349, "task_loss": 0.0981798768043518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799792188145389, "compression_loss": 0.0, "distillation_loss": 0.10756382346153259, "epoch": 6.98, "learning_rate": 1.5349675111214768e-09, "loss": 0.1035, "step": 7350, "task_loss": 0.06680326163768768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997927675461366, "compression_loss": 0.0, "distillation_loss": 0.06751997768878937, "epoch": 6.98, "learning_rate": 1.419164844831955e-09, "loss": 0.0741, "step": 7351, "task_loss": 0.13287892937660217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997933458689296, "compression_loss": 0.0, "distillation_loss": 0.028206095099449158, "epoch": 6.98, "learning_rate": 1.3079032911450516e-09, "loss": 0.0329, "step": 7352, "task_loss": 0.07551902532577515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997939231147716, "compression_loss": 0.0, "distillation_loss": 0.0961083471775055, "epoch": 6.98, "learning_rate": 1.201182870272377e-09, "loss": 0.0902, "step": 7353, "task_loss": 0.03751200810074806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997944992846665, "compression_loss": 0.0, "distillation_loss": 0.01709740236401558, "epoch": 6.98, "learning_rate": 1.0990036016012005e-09, "loss": 0.0161, "step": 7354, "task_loss": 0.00756429135799408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997950743796178, "compression_loss": 0.0, "distillation_loss": 0.024914680048823357, "epoch": 6.98, "learning_rate": 1.0013655036916759e-09, "loss": 0.0318, "step": 7355, "task_loss": 0.094090037047863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997956484006293, "compression_loss": 0.0, "distillation_loss": 0.013773511163890362, "epoch": 6.99, "learning_rate": 9.082685942796154e-10, "loss": 0.0129, "step": 7356, "task_loss": 0.004718998447060585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997962213487048, "compression_loss": 0.0, "distillation_loss": 0.020373770967125893, "epoch": 6.99, "learning_rate": 8.197128902792672e-10, "loss": 0.0326, "step": 7357, "task_loss": 0.1427568942308426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997967932248476, "compression_loss": 0.0, "distillation_loss": 0.08715569972991943, "epoch": 6.99, "learning_rate": 7.356984077722117e-10, "loss": 0.0883, "step": 7358, "task_loss": 0.09825003892183304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997973640300617, "compression_loss": 0.0, "distillation_loss": 0.03974455967545509, "epoch": 6.99, "learning_rate": 6.562251620267912e-10, "loss": 0.0361, "step": 7359, "task_loss": 0.0036314092576503754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997979337653507, "compression_loss": 0.0, "distillation_loss": 0.10460391640663147, "epoch": 6.99, "learning_rate": 5.812931674759048e-10, "loss": 0.1047, "step": 7360, "task_loss": 0.10604080557823181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997985024317182, "compression_loss": 0.0, "distillation_loss": 0.020527970045804977, "epoch": 6.99, "learning_rate": 5.109024377308869e-10, "loss": 0.0209, "step": 7361, "task_loss": 0.02422548085451126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799799070030168, "compression_loss": 0.0, "distillation_loss": 0.09669515490531921, "epoch": 6.99, "learning_rate": 4.450529855787311e-10, "loss": 0.0906, "step": 7362, "task_loss": 0.03620155528187752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7997996365617038, "compression_loss": 0.0, "distillation_loss": 0.026915445923805237, "epoch": 6.99, "learning_rate": 3.8374482298209057e-10, "loss": 0.0248, "step": 7363, "task_loss": 0.005302935838699341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998002020273292, "compression_loss": 0.0, "distillation_loss": 0.028947005048394203, "epoch": 6.99, "learning_rate": 3.2697796107650226e-10, "loss": 0.0354, "step": 7364, "task_loss": 0.09320739656686783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998007664280478, "compression_loss": 0.0, "distillation_loss": 0.013993775472044945, "epoch": 6.99, "learning_rate": 2.7475241017871355e-10, "loss": 0.0266, "step": 7365, "task_loss": 0.1398152858018875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998013297648634, "compression_loss": 0.0, "distillation_loss": 0.0770372524857521, "epoch": 7.0, "learning_rate": 2.2706817977002914e-10, "loss": 0.0781, "step": 7366, "task_loss": 0.0880659967660904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998018920387797, "compression_loss": 0.0, "distillation_loss": 0.052641429007053375, "epoch": 7.0, "learning_rate": 1.8392527851296415e-10, "loss": 0.0551, "step": 7367, "task_loss": 0.0774116963148117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998024532508003, "compression_loss": 0.0, "distillation_loss": 0.05743606388568878, "epoch": 7.0, "learning_rate": 1.453237142484687e-10, "loss": 0.0612, "step": 7368, "task_loss": 0.09504348039627075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.799803013401929, "compression_loss": 0.0, "distillation_loss": 0.026639528572559357, "epoch": 7.0, "learning_rate": 1.112634939848256e-10, "loss": 0.0321, "step": 7369, "task_loss": 0.08151736855506897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998035724931695, "compression_loss": 0.0, "distillation_loss": 0.03500088304281235, "epoch": 7.0, "learning_rate": 8.174462391430382e-11, "loss": 0.0384, "step": 7370, "task_loss": 0.06867900490760803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6237196162521903, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7984375187077579, "compression/magnitude_sparsity/target_sparsity_level": 0.7998041305255252, "compression_loss": 0.0, "distillation_loss": 0.012465332634747028, "epoch": 7.0, "learning_rate": 5.676710939372942e-11, "loss": 0.0127, "step": 7371, "task_loss": 0.014598931185901165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998046875, "compression_loss": 0.0, "distillation_loss": 0.09104140102863312, "epoch": 7.0, "learning_rate": 3.633095496113903e-11, "loss": 0.0992, "step": 7372, "task_loss": 0.17217549681663513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998052434175976, "compression_loss": 0.0, "distillation_loss": 0.07841981947422028, "epoch": 7.0, "learning_rate": 2.0436164330228658e-11, "loss": 0.0708, "step": 7373, "task_loss": 0.0018375962972640991 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998057982793217, "compression_loss": 0.0, "distillation_loss": 0.08232280611991882, "epoch": 7.0, "learning_rate": 9.082740390353728e-12, "loss": 0.0767, "step": 7374, "task_loss": 0.026533475145697594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998063520861759, "compression_loss": 0.0, "distillation_loss": 0.07200402021408081, "epoch": 7.0, "learning_rate": 2.270685200977951e-12, "loss": 0.0776, "step": 7375, "task_loss": 0.12824085354804993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998069048391638, "compression_loss": 0.0, "distillation_loss": 0.19095724821090698, "epoch": 7.0, "learning_rate": 5e-05, "loss": 0.1839, "step": 7376, "task_loss": 0.12048640847206116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998074565392893, "compression_loss": 0.0, "distillation_loss": 0.04641976207494736, "epoch": 7.01, "learning_rate": 4.99999977293148e-05, "loss": 0.05, "step": 7377, "task_loss": 0.0825609490275383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998080071875558, "compression_loss": 0.0, "distillation_loss": 0.1098412275314331, "epoch": 7.01, "learning_rate": 4.999999091725961e-05, "loss": 0.111, "step": 7378, "task_loss": 0.12164577841758728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998085567849673, "compression_loss": 0.0, "distillation_loss": 0.07424164563417435, "epoch": 7.01, "learning_rate": 4.999997956383567e-05, "loss": 0.0833, "step": 7379, "task_loss": 0.16513806581497192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998091053325274, "compression_loss": 0.0, "distillation_loss": 0.08631199598312378, "epoch": 7.01, "learning_rate": 4.999996366904504e-05, "loss": 0.0779, "step": 7380, "task_loss": 0.002591833472251892 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998096528312395, "compression_loss": 0.0, "distillation_loss": 0.09567269682884216, "epoch": 7.01, "learning_rate": 4.999994323289061e-05, "loss": 0.0913, "step": 7381, "task_loss": 0.05151829496026039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998101992821076, "compression_loss": 0.0, "distillation_loss": 0.0753033235669136, "epoch": 7.01, "learning_rate": 4.999991825537609e-05, "loss": 0.0694, "step": 7382, "task_loss": 0.016505436971783638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998107446861353, "compression_loss": 0.0, "distillation_loss": 0.038341403007507324, "epoch": 7.01, "learning_rate": 4.999988873650602e-05, "loss": 0.0354, "step": 7383, "task_loss": 0.008481014519929886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998112890443263, "compression_loss": 0.0, "distillation_loss": 0.04590607061982155, "epoch": 7.01, "learning_rate": 4.999985467628575e-05, "loss": 0.0532, "step": 7384, "task_loss": 0.11850175261497498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998118323576842, "compression_loss": 0.0, "distillation_loss": 0.06865381449460983, "epoch": 7.01, "learning_rate": 4.999981607472149e-05, "loss": 0.0665, "step": 7385, "task_loss": 0.046819057315588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998123746272127, "compression_loss": 0.0, "distillation_loss": 0.06713886559009552, "epoch": 7.01, "learning_rate": 4.999977293182023e-05, "loss": 0.0622, "step": 7386, "task_loss": 0.018027808517217636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998129158539155, "compression_loss": 0.0, "distillation_loss": 0.03523305803537369, "epoch": 7.02, "learning_rate": 4.999972524758982e-05, "loss": 0.0459, "step": 7387, "task_loss": 0.14175420999526978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998134560387963, "compression_loss": 0.0, "distillation_loss": 0.10418183356523514, "epoch": 7.02, "learning_rate": 4.999967302203893e-05, "loss": 0.106, "step": 7388, "task_loss": 0.12198633700609207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998139951828587, "compression_loss": 0.0, "distillation_loss": 0.02372100204229355, "epoch": 7.02, "learning_rate": 4.9999616255177016e-05, "loss": 0.0308, "step": 7389, "task_loss": 0.09421825408935547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998145332871065, "compression_loss": 0.0, "distillation_loss": 0.028630398213863373, "epoch": 7.02, "learning_rate": 4.999955494701443e-05, "loss": 0.0346, "step": 7390, "task_loss": 0.08789223432540894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998150703525434, "compression_loss": 0.0, "distillation_loss": 0.14551189541816711, "epoch": 7.02, "learning_rate": 4.999948909756227e-05, "loss": 0.1518, "step": 7391, "task_loss": 0.20824161171913147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998156063801729, "compression_loss": 0.0, "distillation_loss": 0.13446444272994995, "epoch": 7.02, "learning_rate": 4.9999418706832525e-05, "loss": 0.1363, "step": 7392, "task_loss": 0.15303920209407806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998161413709989, "compression_loss": 0.0, "distillation_loss": 0.025591343641281128, "epoch": 7.02, "learning_rate": 4.9999343774837976e-05, "loss": 0.0238, "step": 7393, "task_loss": 0.007202155888080597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799816675326025, "compression_loss": 0.0, "distillation_loss": 0.03282269090414047, "epoch": 7.02, "learning_rate": 4.999926430159223e-05, "loss": 0.031, "step": 7394, "task_loss": 0.014277882874011993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998172082462548, "compression_loss": 0.0, "distillation_loss": 0.05018794909119606, "epoch": 7.02, "learning_rate": 4.9999180287109725e-05, "loss": 0.0654, "step": 7395, "task_loss": 0.20274071395397186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799817740132692, "compression_loss": 0.0, "distillation_loss": 0.027372337877750397, "epoch": 7.02, "learning_rate": 4.999909173140572e-05, "loss": 0.0256, "step": 7396, "task_loss": 0.01010635495185852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998182709863404, "compression_loss": 0.0, "distillation_loss": 0.02192852459847927, "epoch": 7.02, "learning_rate": 4.999899863449631e-05, "loss": 0.0256, "step": 7397, "task_loss": 0.05868425592780113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998188008082036, "compression_loss": 0.0, "distillation_loss": 0.06173873692750931, "epoch": 7.03, "learning_rate": 4.99989009963984e-05, "loss": 0.0675, "step": 7398, "task_loss": 0.1194574236869812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998193295992853, "compression_loss": 0.0, "distillation_loss": 0.06959561258554459, "epoch": 7.03, "learning_rate": 4.999879881712973e-05, "loss": 0.0697, "step": 7399, "task_loss": 0.07095710933208466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998198573605891, "compression_loss": 0.0, "distillation_loss": 0.08823947608470917, "epoch": 7.03, "learning_rate": 4.999869209670885e-05, "loss": 0.0889, "step": 7400, "task_loss": 0.0944376289844513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998203840931188, "compression_loss": 0.0, "distillation_loss": 0.03558321297168732, "epoch": 7.03, "learning_rate": 4.999858083515517e-05, "loss": 0.0466, "step": 7401, "task_loss": 0.14564195275306702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799820909797878, "compression_loss": 0.0, "distillation_loss": 0.1047942191362381, "epoch": 7.03, "learning_rate": 4.999846503248888e-05, "loss": 0.1106, "step": 7402, "task_loss": 0.16298425197601318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998214344758705, "compression_loss": 0.0, "distillation_loss": 0.03409132733941078, "epoch": 7.03, "learning_rate": 4.9998344688731027e-05, "loss": 0.0435, "step": 7403, "task_loss": 0.12837141752243042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998219581280999, "compression_loss": 0.0, "distillation_loss": 0.025968341156840324, "epoch": 7.03, "learning_rate": 4.999821980390346e-05, "loss": 0.0237, "step": 7404, "task_loss": 0.003702618181705475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998224807555698, "compression_loss": 0.0, "distillation_loss": 0.04202386364340782, "epoch": 7.03, "learning_rate": 4.999809037802888e-05, "loss": 0.0391, "step": 7405, "task_loss": 0.013203632086515427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799823002359284, "compression_loss": 0.0, "distillation_loss": 0.020798005163669586, "epoch": 7.03, "learning_rate": 4.999795641113079e-05, "loss": 0.0203, "step": 7406, "task_loss": 0.016108643263578415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998235229402462, "compression_loss": 0.0, "distillation_loss": 0.04067230597138405, "epoch": 7.03, "learning_rate": 4.9997817903233527e-05, "loss": 0.0412, "step": 7407, "task_loss": 0.04560984671115875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998240424994599, "compression_loss": 0.0, "distillation_loss": 0.03840542584657669, "epoch": 7.04, "learning_rate": 4.999767485436224e-05, "loss": 0.0402, "step": 7408, "task_loss": 0.05598912388086319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998245610379291, "compression_loss": 0.0, "distillation_loss": 0.028701873496174812, "epoch": 7.04, "learning_rate": 4.999752726454293e-05, "loss": 0.0274, "step": 7409, "task_loss": 0.015496550127863884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998250785566572, "compression_loss": 0.0, "distillation_loss": 0.03353622183203697, "epoch": 7.04, "learning_rate": 4.9997375133802415e-05, "loss": 0.0376, "step": 7410, "task_loss": 0.07444252073764801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799825595056648, "compression_loss": 0.0, "distillation_loss": 0.021153662353754044, "epoch": 7.04, "learning_rate": 4.999721846216831e-05, "loss": 0.0196, "step": 7411, "task_loss": 0.005829468369483948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998261105389051, "compression_loss": 0.0, "distillation_loss": 0.059076711535453796, "epoch": 7.04, "learning_rate": 4.999705724966908e-05, "loss": 0.0687, "step": 7412, "task_loss": 0.15493744611740112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998266250044322, "compression_loss": 0.0, "distillation_loss": 0.06167703866958618, "epoch": 7.04, "learning_rate": 4.999689149633402e-05, "loss": 0.0608, "step": 7413, "task_loss": 0.05248799920082092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799827138454233, "compression_loss": 0.0, "distillation_loss": 0.02228119783103466, "epoch": 7.04, "learning_rate": 4.999672120219323e-05, "loss": 0.0276, "step": 7414, "task_loss": 0.07509797811508179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998276508893114, "compression_loss": 0.0, "distillation_loss": 0.03827614709734917, "epoch": 7.04, "learning_rate": 4.999654636727764e-05, "loss": 0.0452, "step": 7415, "task_loss": 0.10720621794462204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998281623106708, "compression_loss": 0.0, "distillation_loss": 0.10008041560649872, "epoch": 7.04, "learning_rate": 4.9996366991619034e-05, "loss": 0.0968, "step": 7416, "task_loss": 0.06756041944026947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998286727193149, "compression_loss": 0.0, "distillation_loss": 0.04735985025763512, "epoch": 7.04, "learning_rate": 4.999618307524997e-05, "loss": 0.0456, "step": 7417, "task_loss": 0.02975446730852127 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998291821162474, "compression_loss": 0.0, "distillation_loss": 0.02187761291861534, "epoch": 7.04, "learning_rate": 4.999599461820387e-05, "loss": 0.0201, "step": 7418, "task_loss": 0.0042492058128118515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998296905024722, "compression_loss": 0.0, "distillation_loss": 0.020126711577177048, "epoch": 7.05, "learning_rate": 4.999580162051497e-05, "loss": 0.0187, "step": 7419, "task_loss": 0.005694573745131493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998301978789928, "compression_loss": 0.0, "distillation_loss": 0.02660163678228855, "epoch": 7.05, "learning_rate": 4.9995604082218314e-05, "loss": 0.0327, "step": 7420, "task_loss": 0.08736631274223328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998307042468128, "compression_loss": 0.0, "distillation_loss": 0.05452951416373253, "epoch": 7.05, "learning_rate": 4.99954020033498e-05, "loss": 0.0549, "step": 7421, "task_loss": 0.058631766587495804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799831209606936, "compression_loss": 0.0, "distillation_loss": 0.0896400734782219, "epoch": 7.05, "learning_rate": 4.9995195383946135e-05, "loss": 0.0861, "step": 7422, "task_loss": 0.053857218474149704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998317139603661, "compression_loss": 0.0, "distillation_loss": 0.026753665879368782, "epoch": 7.05, "learning_rate": 4.999498422404485e-05, "loss": 0.0279, "step": 7423, "task_loss": 0.03794853016734123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998322173081067, "compression_loss": 0.0, "distillation_loss": 0.023596402257680893, "epoch": 7.05, "learning_rate": 4.999476852368431e-05, "loss": 0.0243, "step": 7424, "task_loss": 0.030923640355467796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998327196511615, "compression_loss": 0.0, "distillation_loss": 0.060296621173620224, "epoch": 7.05, "learning_rate": 4.999454828290369e-05, "loss": 0.0597, "step": 7425, "task_loss": 0.05454102158546448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998332209905342, "compression_loss": 0.0, "distillation_loss": 0.0592413991689682, "epoch": 7.05, "learning_rate": 4.999432350174299e-05, "loss": 0.0575, "step": 7426, "task_loss": 0.04156405106186867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998337213272286, "compression_loss": 0.0, "distillation_loss": 0.07152185589075089, "epoch": 7.05, "learning_rate": 4.9994094180243055e-05, "loss": 0.0712, "step": 7427, "task_loss": 0.0684933215379715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998342206622482, "compression_loss": 0.0, "distillation_loss": 0.0811118632555008, "epoch": 7.05, "learning_rate": 4.999386031844554e-05, "loss": 0.0814, "step": 7428, "task_loss": 0.08375652134418488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998347189965967, "compression_loss": 0.0, "distillation_loss": 0.0416887030005455, "epoch": 7.06, "learning_rate": 4.999362191639293e-05, "loss": 0.0431, "step": 7429, "task_loss": 0.05558731034398079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799835216331278, "compression_loss": 0.0, "distillation_loss": 0.04246686398983002, "epoch": 7.06, "learning_rate": 4.999337897412852e-05, "loss": 0.0439, "step": 7430, "task_loss": 0.05704366788268089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998357126672955, "compression_loss": 0.0, "distillation_loss": 0.028290575370192528, "epoch": 7.06, "learning_rate": 4.999313149169645e-05, "loss": 0.0431, "step": 7431, "task_loss": 0.17606376111507416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799836208005653, "compression_loss": 0.0, "distillation_loss": 0.08979344367980957, "epoch": 7.06, "learning_rate": 4.999287946914169e-05, "loss": 0.0868, "step": 7432, "task_loss": 0.06011103093624115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998367023473542, "compression_loss": 0.0, "distillation_loss": 0.06186497583985329, "epoch": 7.06, "learning_rate": 4.999262290651e-05, "loss": 0.0586, "step": 7433, "task_loss": 0.02930166944861412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998371956934027, "compression_loss": 0.0, "distillation_loss": 0.022470489144325256, "epoch": 7.06, "learning_rate": 4.9992361803847995e-05, "loss": 0.0208, "step": 7434, "task_loss": 0.005419734865427017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998376880448024, "compression_loss": 0.0, "distillation_loss": 0.056562915444374084, "epoch": 7.06, "learning_rate": 4.99920961612031e-05, "loss": 0.0685, "step": 7435, "task_loss": 0.17543764412403107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998381794025567, "compression_loss": 0.0, "distillation_loss": 0.05138598382472992, "epoch": 7.06, "learning_rate": 4.9991825978623574e-05, "loss": 0.0542, "step": 7436, "task_loss": 0.07962413132190704 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998386697676695, "compression_loss": 0.0, "distillation_loss": 0.015319476835429668, "epoch": 7.06, "learning_rate": 4.9991551256158495e-05, "loss": 0.0277, "step": 7437, "task_loss": 0.13944551348686218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998391591411443, "compression_loss": 0.0, "distillation_loss": 0.01975143700838089, "epoch": 7.06, "learning_rate": 4.999127199385778e-05, "loss": 0.0257, "step": 7438, "task_loss": 0.07912784814834595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799839647523985, "compression_loss": 0.0, "distillation_loss": 0.07420433312654495, "epoch": 7.06, "learning_rate": 4.999098819177214e-05, "loss": 0.0816, "step": 7439, "task_loss": 0.14786875247955322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799840134917195, "compression_loss": 0.0, "distillation_loss": 0.027095727622509003, "epoch": 7.07, "learning_rate": 4.999069984995314e-05, "loss": 0.0319, "step": 7440, "task_loss": 0.07478499412536621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998406213217781, "compression_loss": 0.0, "distillation_loss": 0.04266301542520523, "epoch": 7.07, "learning_rate": 4.999040696845315e-05, "loss": 0.0465, "step": 7441, "task_loss": 0.08147358894348145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998411067387382, "compression_loss": 0.0, "distillation_loss": 0.021399151533842087, "epoch": 7.07, "learning_rate": 4.999010954732538e-05, "loss": 0.026, "step": 7442, "task_loss": 0.06721732020378113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998415911690787, "compression_loss": 0.0, "distillation_loss": 0.04915117844939232, "epoch": 7.07, "learning_rate": 4.998980758662386e-05, "loss": 0.0452, "step": 7443, "task_loss": 0.009514054283499718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998420746138034, "compression_loss": 0.0, "distillation_loss": 0.08443302661180496, "epoch": 7.07, "learning_rate": 4.998950108640345e-05, "loss": 0.0882, "step": 7444, "task_loss": 0.12241680175065994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799842557073916, "compression_loss": 0.0, "distillation_loss": 0.03232605382800102, "epoch": 7.07, "learning_rate": 4.99891900467198e-05, "loss": 0.0313, "step": 7445, "task_loss": 0.022218016907572746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998430385504202, "compression_loss": 0.0, "distillation_loss": 0.032280419021844864, "epoch": 7.07, "learning_rate": 4.9988874467629435e-05, "loss": 0.0345, "step": 7446, "task_loss": 0.05450471490621567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998435190443195, "compression_loss": 0.0, "distillation_loss": 0.027930978685617447, "epoch": 7.07, "learning_rate": 4.998855434918968e-05, "loss": 0.0262, "step": 7447, "task_loss": 0.010617373511195183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998439985566178, "compression_loss": 0.0, "distillation_loss": 0.06132368743419647, "epoch": 7.07, "learning_rate": 4.998822969145868e-05, "loss": 0.0739, "step": 7448, "task_loss": 0.18746685981750488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998444770883186, "compression_loss": 0.0, "distillation_loss": 0.03330961614847183, "epoch": 7.07, "learning_rate": 4.99879004944954e-05, "loss": 0.0313, "step": 7449, "task_loss": 0.012728353962302208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998449546404258, "compression_loss": 0.0, "distillation_loss": 0.018877487629652023, "epoch": 7.08, "learning_rate": 4.998756675835966e-05, "loss": 0.0173, "step": 7450, "task_loss": 0.0031468812376260757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998454312139429, "compression_loss": 0.0, "distillation_loss": 0.08477997779846191, "epoch": 7.08, "learning_rate": 4.9987228483112083e-05, "loss": 0.0814, "step": 7451, "task_loss": 0.05088125914335251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998459068098737, "compression_loss": 0.0, "distillation_loss": 0.05044269561767578, "epoch": 7.08, "learning_rate": 4.998688566881411e-05, "loss": 0.0471, "step": 7452, "task_loss": 0.016654808074235916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998463814292217, "compression_loss": 0.0, "distillation_loss": 0.04332347959280014, "epoch": 7.08, "learning_rate": 4.998653831552801e-05, "loss": 0.0424, "step": 7453, "task_loss": 0.034489430487155914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998468550729908, "compression_loss": 0.0, "distillation_loss": 0.09742948412895203, "epoch": 7.08, "learning_rate": 4.998618642331689e-05, "loss": 0.1015, "step": 7454, "task_loss": 0.13810521364212036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998473277421846, "compression_loss": 0.0, "distillation_loss": 0.06396178901195526, "epoch": 7.08, "learning_rate": 4.9985829992244675e-05, "loss": 0.0684, "step": 7455, "task_loss": 0.10790328681468964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998477994378067, "compression_loss": 0.0, "distillation_loss": 0.04675624519586563, "epoch": 7.08, "learning_rate": 4.998546902237611e-05, "loss": 0.0458, "step": 7456, "task_loss": 0.03740784153342247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998482701608609, "compression_loss": 0.0, "distillation_loss": 0.018789753317832947, "epoch": 7.08, "learning_rate": 4.9985103513776764e-05, "loss": 0.0245, "step": 7457, "task_loss": 0.07571886479854584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998487399123507, "compression_loss": 0.0, "distillation_loss": 0.06566867977380753, "epoch": 7.08, "learning_rate": 4.998473346651303e-05, "loss": 0.0631, "step": 7458, "task_loss": 0.03988581523299217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998492086932801, "compression_loss": 0.0, "distillation_loss": 0.03311315178871155, "epoch": 7.08, "learning_rate": 4.9984358880652146e-05, "loss": 0.0408, "step": 7459, "task_loss": 0.1103094071149826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998496765046526, "compression_loss": 0.0, "distillation_loss": 0.11287416517734528, "epoch": 7.08, "learning_rate": 4.9983979756262136e-05, "loss": 0.1219, "step": 7460, "task_loss": 0.20322994887828827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998501433474717, "compression_loss": 0.0, "distillation_loss": 0.026843059808015823, "epoch": 7.09, "learning_rate": 4.998359609341188e-05, "loss": 0.037, "step": 7461, "task_loss": 0.12811830639839172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998506092227414, "compression_loss": 0.0, "distillation_loss": 0.06953533738851547, "epoch": 7.09, "learning_rate": 4.9983207892171074e-05, "loss": 0.0673, "step": 7462, "task_loss": 0.04702718183398247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998510741314653, "compression_loss": 0.0, "distillation_loss": 0.05555496737360954, "epoch": 7.09, "learning_rate": 4.998281515261023e-05, "loss": 0.0607, "step": 7463, "task_loss": 0.10690201073884964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998515380746469, "compression_loss": 0.0, "distillation_loss": 0.025969993323087692, "epoch": 7.09, "learning_rate": 4.9982417874800704e-05, "loss": 0.0266, "step": 7464, "task_loss": 0.03247838839888573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79985200105329, "compression_loss": 0.0, "distillation_loss": 0.057906825095415115, "epoch": 7.09, "learning_rate": 4.998201605881465e-05, "loss": 0.0635, "step": 7465, "task_loss": 0.1140889823436737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998524630683984, "compression_loss": 0.0, "distillation_loss": 0.019549962133169174, "epoch": 7.09, "learning_rate": 4.9981609704725057e-05, "loss": 0.0261, "step": 7466, "task_loss": 0.08523699641227722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998529241209756, "compression_loss": 0.0, "distillation_loss": 0.14493508636951447, "epoch": 7.09, "learning_rate": 4.998119881260576e-05, "loss": 0.1549, "step": 7467, "task_loss": 0.244978129863739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998533842120253, "compression_loss": 0.0, "distillation_loss": 0.10177647322416306, "epoch": 7.09, "learning_rate": 4.9980783382531376e-05, "loss": 0.1153, "step": 7468, "task_loss": 0.23712855577468872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998538433425514, "compression_loss": 0.0, "distillation_loss": 0.0323081910610199, "epoch": 7.09, "learning_rate": 4.998036341457739e-05, "loss": 0.0372, "step": 7469, "task_loss": 0.08089584857225418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998543015135573, "compression_loss": 0.0, "distillation_loss": 0.045823805034160614, "epoch": 7.09, "learning_rate": 4.997993890882008e-05, "loss": 0.0499, "step": 7470, "task_loss": 0.08696474879980087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998547587260468, "compression_loss": 0.0, "distillation_loss": 0.03767596185207367, "epoch": 7.09, "learning_rate": 4.997950986533656e-05, "loss": 0.0433, "step": 7471, "task_loss": 0.0938701331615448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998552149810236, "compression_loss": 0.0, "distillation_loss": 0.040337882936000824, "epoch": 7.1, "learning_rate": 4.997907628420477e-05, "loss": 0.0532, "step": 7472, "task_loss": 0.16945821046829224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998556702794914, "compression_loss": 0.0, "distillation_loss": 0.0397014319896698, "epoch": 7.1, "learning_rate": 4.9978638165503475e-05, "loss": 0.0405, "step": 7473, "task_loss": 0.04727710038423538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998561246224538, "compression_loss": 0.0, "distillation_loss": 0.04514710605144501, "epoch": 7.1, "learning_rate": 4.9978195509312266e-05, "loss": 0.061, "step": 7474, "task_loss": 0.20394141972064972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998565780109146, "compression_loss": 0.0, "distillation_loss": 0.13967446982860565, "epoch": 7.1, "learning_rate": 4.997774831571154e-05, "loss": 0.143, "step": 7475, "task_loss": 0.17266049981117249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998570304458774, "compression_loss": 0.0, "distillation_loss": 0.043175503611564636, "epoch": 7.1, "learning_rate": 4.9977296584782544e-05, "loss": 0.0402, "step": 7476, "task_loss": 0.013554053381085396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998574819283458, "compression_loss": 0.0, "distillation_loss": 0.024114977568387985, "epoch": 7.1, "learning_rate": 4.997684031660732e-05, "loss": 0.0293, "step": 7477, "task_loss": 0.07583250850439072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998579324593237, "compression_loss": 0.0, "distillation_loss": 0.04103930667042732, "epoch": 7.1, "learning_rate": 4.997637951126877e-05, "loss": 0.0476, "step": 7478, "task_loss": 0.1067778468132019 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998583820398146, "compression_loss": 0.0, "distillation_loss": 0.10217143595218658, "epoch": 7.1, "learning_rate": 4.997591416885059e-05, "loss": 0.1005, "step": 7479, "task_loss": 0.08553251624107361 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998588306708222, "compression_loss": 0.0, "distillation_loss": 0.021832741796970367, "epoch": 7.1, "learning_rate": 4.997544428943732e-05, "loss": 0.0202, "step": 7480, "task_loss": 0.005031948909163475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998592783533502, "compression_loss": 0.0, "distillation_loss": 0.024123501032590866, "epoch": 7.1, "learning_rate": 4.997496987311431e-05, "loss": 0.0222, "step": 7481, "task_loss": 0.0046864766627550125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998597250884024, "compression_loss": 0.0, "distillation_loss": 0.01681506633758545, "epoch": 7.11, "learning_rate": 4.997449091996774e-05, "loss": 0.023, "step": 7482, "task_loss": 0.0789770558476448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998601708769824, "compression_loss": 0.0, "distillation_loss": 0.031241536140441895, "epoch": 7.11, "learning_rate": 4.9974007430084617e-05, "loss": 0.0407, "step": 7483, "task_loss": 0.12625262141227722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998606157200938, "compression_loss": 0.0, "distillation_loss": 0.05506356433033943, "epoch": 7.11, "learning_rate": 4.997351940355277e-05, "loss": 0.0503, "step": 7484, "task_loss": 0.0069837626069784164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998610596187403, "compression_loss": 0.0, "distillation_loss": 0.02426174283027649, "epoch": 7.11, "learning_rate": 4.997302684046085e-05, "loss": 0.0417, "step": 7485, "task_loss": 0.19815370440483093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998615025739257, "compression_loss": 0.0, "distillation_loss": 0.014478189870715141, "epoch": 7.11, "learning_rate": 4.997252974089833e-05, "loss": 0.0135, "step": 7486, "task_loss": 0.004255037754774094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998619445866537, "compression_loss": 0.0, "distillation_loss": 0.03041962906718254, "epoch": 7.11, "learning_rate": 4.997202810495551e-05, "loss": 0.0277, "step": 7487, "task_loss": 0.0034227408468723297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998623856579278, "compression_loss": 0.0, "distillation_loss": 0.04478137940168381, "epoch": 7.11, "learning_rate": 4.997152193272353e-05, "loss": 0.0427, "step": 7488, "task_loss": 0.024202093482017517 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998628257887518, "compression_loss": 0.0, "distillation_loss": 0.016431665048003197, "epoch": 7.11, "learning_rate": 4.9971011224294314e-05, "loss": 0.0153, "step": 7489, "task_loss": 0.004669001325964928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998632649801293, "compression_loss": 0.0, "distillation_loss": 0.042685117572546005, "epoch": 7.11, "learning_rate": 4.997049597976066e-05, "loss": 0.0468, "step": 7490, "task_loss": 0.0839599221944809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998637032330641, "compression_loss": 0.0, "distillation_loss": 0.038373369723558426, "epoch": 7.11, "learning_rate": 4.9969976199216144e-05, "loss": 0.0551, "step": 7491, "task_loss": 0.2055097371339798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998641405485598, "compression_loss": 0.0, "distillation_loss": 0.01765899360179901, "epoch": 7.11, "learning_rate": 4.9969451882755196e-05, "loss": 0.0163, "step": 7492, "task_loss": 0.0037982575595378876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998645769276201, "compression_loss": 0.0, "distillation_loss": 0.019907163456082344, "epoch": 7.12, "learning_rate": 4.996892303047306e-05, "loss": 0.0281, "step": 7493, "task_loss": 0.10134420543909073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998650123712487, "compression_loss": 0.0, "distillation_loss": 0.02686948888003826, "epoch": 7.12, "learning_rate": 4.996838964246581e-05, "loss": 0.0298, "step": 7494, "task_loss": 0.055808331817388535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998654468804494, "compression_loss": 0.0, "distillation_loss": 0.07542771100997925, "epoch": 7.12, "learning_rate": 4.996785171883032e-05, "loss": 0.0714, "step": 7495, "task_loss": 0.034990422427654266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998658804562255, "compression_loss": 0.0, "distillation_loss": 0.026508131995797157, "epoch": 7.12, "learning_rate": 4.996730925966433e-05, "loss": 0.0363, "step": 7496, "task_loss": 0.12431460618972778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998663130995811, "compression_loss": 0.0, "distillation_loss": 0.08525696396827698, "epoch": 7.12, "learning_rate": 4.996676226506636e-05, "loss": 0.0831, "step": 7497, "task_loss": 0.06366419792175293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998667448115198, "compression_loss": 0.0, "distillation_loss": 0.02573692798614502, "epoch": 7.12, "learning_rate": 4.9966210735135785e-05, "loss": 0.0237, "step": 7498, "task_loss": 0.005860496312379837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799867175593045, "compression_loss": 0.0, "distillation_loss": 0.10803981125354767, "epoch": 7.12, "learning_rate": 4.9965654669972794e-05, "loss": 0.1114, "step": 7499, "task_loss": 0.14213261008262634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998676054451607, "compression_loss": 0.0, "distillation_loss": 0.018904566764831543, "epoch": 7.12, "learning_rate": 4.99650940696784e-05, "loss": 0.0179, "step": 7500, "task_loss": 0.009208250790834427 }, { "epoch": 7.12, "eval_accuracy": 0.9036697247706422, "eval_loss": 0.4249095320701599, "eval_runtime": 18.0437, "eval_samples_per_second": 48.327, "eval_steps_per_second": 6.041, "step": 7500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998680343688704, "compression_loss": 0.0, "distillation_loss": 0.1428883969783783, "epoch": 7.12, "learning_rate": 4.996452893435442e-05, "loss": 0.1441, "step": 7501, "task_loss": 0.1551024615764618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799868462365178, "compression_loss": 0.0, "distillation_loss": 0.04754265025258064, "epoch": 7.12, "learning_rate": 4.9963959264103544e-05, "loss": 0.0454, "step": 7502, "task_loss": 0.026247184723615646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998688894350868, "compression_loss": 0.0, "distillation_loss": 0.04800819978117943, "epoch": 7.13, "learning_rate": 4.996338505902924e-05, "loss": 0.0622, "step": 7503, "task_loss": 0.18986263871192932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998693155796008, "compression_loss": 0.0, "distillation_loss": 0.08775762468576431, "epoch": 7.13, "learning_rate": 4.996280631923581e-05, "loss": 0.0996, "step": 7504, "task_loss": 0.20661881566047668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998697407997237, "compression_loss": 0.0, "distillation_loss": 0.05795777961611748, "epoch": 7.13, "learning_rate": 4.9962223044828396e-05, "loss": 0.0575, "step": 7505, "task_loss": 0.05344875901937485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998701650964589, "compression_loss": 0.0, "distillation_loss": 0.07826469838619232, "epoch": 7.13, "learning_rate": 4.9961635235912935e-05, "loss": 0.0739, "step": 7506, "task_loss": 0.03454684466123581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998705884708104, "compression_loss": 0.0, "distillation_loss": 0.016553938388824463, "epoch": 7.13, "learning_rate": 4.9961042892596225e-05, "loss": 0.0304, "step": 7507, "task_loss": 0.1547098606824875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998710109237817, "compression_loss": 0.0, "distillation_loss": 0.024404142051935196, "epoch": 7.13, "learning_rate": 4.996044601498586e-05, "loss": 0.0231, "step": 7508, "task_loss": 0.010925725102424622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998714324563765, "compression_loss": 0.0, "distillation_loss": 0.12660256028175354, "epoch": 7.13, "learning_rate": 4.995984460319026e-05, "loss": 0.1258, "step": 7509, "task_loss": 0.11870937794446945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998718530695985, "compression_loss": 0.0, "distillation_loss": 0.029197368770837784, "epoch": 7.13, "learning_rate": 4.995923865731869e-05, "loss": 0.0291, "step": 7510, "task_loss": 0.028612298890948296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998722727644514, "compression_loss": 0.0, "distillation_loss": 0.01047599595040083, "epoch": 7.13, "learning_rate": 4.9958628177481195e-05, "loss": 0.0097, "step": 7511, "task_loss": 0.002894926816225052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998726915419389, "compression_loss": 0.0, "distillation_loss": 0.13094131648540497, "epoch": 7.13, "learning_rate": 4.99580131637887e-05, "loss": 0.1189, "step": 7512, "task_loss": 0.011026578024029732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998731094030646, "compression_loss": 0.0, "distillation_loss": 0.054517075419425964, "epoch": 7.13, "learning_rate": 4.995739361635292e-05, "loss": 0.0499, "step": 7513, "task_loss": 0.008429093286395073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998735263488322, "compression_loss": 0.0, "distillation_loss": 0.057775288820266724, "epoch": 7.14, "learning_rate": 4.9956769535286385e-05, "loss": 0.0621, "step": 7514, "task_loss": 0.1013016402721405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998739423802455, "compression_loss": 0.0, "distillation_loss": 0.13451939821243286, "epoch": 7.14, "learning_rate": 4.9956140920702476e-05, "loss": 0.1393, "step": 7515, "task_loss": 0.1819526106119156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799874357498308, "compression_loss": 0.0, "distillation_loss": 0.039106979966163635, "epoch": 7.14, "learning_rate": 4.995550777271538e-05, "loss": 0.0363, "step": 7516, "task_loss": 0.010946827009320259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998747717040235, "compression_loss": 0.0, "distillation_loss": 0.03019588068127632, "epoch": 7.14, "learning_rate": 4.995487009144011e-05, "loss": 0.0277, "step": 7517, "task_loss": 0.005600154399871826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998751849983957, "compression_loss": 0.0, "distillation_loss": 0.1031942293047905, "epoch": 7.14, "learning_rate": 4.99542278769925e-05, "loss": 0.1217, "step": 7518, "task_loss": 0.28847843408584595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998755973824282, "compression_loss": 0.0, "distillation_loss": 0.03711920231580734, "epoch": 7.14, "learning_rate": 4.995358112948921e-05, "loss": 0.0388, "step": 7519, "task_loss": 0.053622905164957047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998760088571247, "compression_loss": 0.0, "distillation_loss": 0.036818671971559525, "epoch": 7.14, "learning_rate": 4.9952929849047734e-05, "loss": 0.034, "step": 7520, "task_loss": 0.008642930537462234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998764194234891, "compression_loss": 0.0, "distillation_loss": 0.040213510394096375, "epoch": 7.14, "learning_rate": 4.9952274035786385e-05, "loss": 0.0417, "step": 7521, "task_loss": 0.054693013429641724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998768290825247, "compression_loss": 0.0, "distillation_loss": 0.025412341579794884, "epoch": 7.14, "learning_rate": 4.9951613689824276e-05, "loss": 0.0302, "step": 7522, "task_loss": 0.07287220656871796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998772378352353, "compression_loss": 0.0, "distillation_loss": 0.052676744759082794, "epoch": 7.14, "learning_rate": 4.995094881128138e-05, "loss": 0.0828, "step": 7523, "task_loss": 0.3543989956378937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998776456826249, "compression_loss": 0.0, "distillation_loss": 0.04211168363690376, "epoch": 7.15, "learning_rate": 4.995027940027846e-05, "loss": 0.0418, "step": 7524, "task_loss": 0.038644563406705856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998780526256968, "compression_loss": 0.0, "distillation_loss": 0.0226961188018322, "epoch": 7.15, "learning_rate": 4.9949605456937135e-05, "loss": 0.0388, "step": 7525, "task_loss": 0.18341752886772156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998784586654548, "compression_loss": 0.0, "distillation_loss": 0.05215848237276077, "epoch": 7.15, "learning_rate": 4.994892698137981e-05, "loss": 0.0506, "step": 7526, "task_loss": 0.03646836429834366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998788638029026, "compression_loss": 0.0, "distillation_loss": 0.022788241505622864, "epoch": 7.15, "learning_rate": 4.9948243973729745e-05, "loss": 0.029, "step": 7527, "task_loss": 0.08468222618103027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998792680390439, "compression_loss": 0.0, "distillation_loss": 0.08569545298814774, "epoch": 7.15, "learning_rate": 4.994755643411101e-05, "loss": 0.0933, "step": 7528, "task_loss": 0.16126233339309692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998796713748824, "compression_loss": 0.0, "distillation_loss": 0.019816741347312927, "epoch": 7.15, "learning_rate": 4.9946864362648506e-05, "loss": 0.0305, "step": 7529, "task_loss": 0.1267254650592804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998800738114217, "compression_loss": 0.0, "distillation_loss": 0.17498815059661865, "epoch": 7.15, "learning_rate": 4.994616775946794e-05, "loss": 0.1664, "step": 7530, "task_loss": 0.08893725275993347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998804753496654, "compression_loss": 0.0, "distillation_loss": 0.022720731794834137, "epoch": 7.15, "learning_rate": 4.994546662469586e-05, "loss": 0.0207, "step": 7531, "task_loss": 0.002466036006808281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998808759906175, "compression_loss": 0.0, "distillation_loss": 0.045352548360824585, "epoch": 7.15, "learning_rate": 4.9944760958459624e-05, "loss": 0.0428, "step": 7532, "task_loss": 0.020112009719014168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998812757352814, "compression_loss": 0.0, "distillation_loss": 0.059900932013988495, "epoch": 7.15, "learning_rate": 4.994405076088743e-05, "loss": 0.063, "step": 7533, "task_loss": 0.0908510833978653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799881674584661, "compression_loss": 0.0, "distillation_loss": 0.06974376738071442, "epoch": 7.15, "learning_rate": 4.994333603210829e-05, "loss": 0.0684, "step": 7534, "task_loss": 0.05587990581989288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998820725397597, "compression_loss": 0.0, "distillation_loss": 0.058896493166685104, "epoch": 7.16, "learning_rate": 4.9942616772252016e-05, "loss": 0.0566, "step": 7535, "task_loss": 0.0360528826713562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998824696015814, "compression_loss": 0.0, "distillation_loss": 0.047747716307640076, "epoch": 7.16, "learning_rate": 4.994189298144929e-05, "loss": 0.0435, "step": 7536, "task_loss": 0.005159799009561539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998828657711297, "compression_loss": 0.0, "distillation_loss": 0.019652074202895164, "epoch": 7.16, "learning_rate": 4.994116465983158e-05, "loss": 0.0345, "step": 7537, "task_loss": 0.16844874620437622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998832610494083, "compression_loss": 0.0, "distillation_loss": 0.17571541666984558, "epoch": 7.16, "learning_rate": 4.99404318075312e-05, "loss": 0.1646, "step": 7538, "task_loss": 0.06422274559736252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998836554374209, "compression_loss": 0.0, "distillation_loss": 0.1369340866804123, "epoch": 7.16, "learning_rate": 4.993969442468125e-05, "loss": 0.1452, "step": 7539, "task_loss": 0.21913862228393555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998840489361712, "compression_loss": 0.0, "distillation_loss": 0.05932403728365898, "epoch": 7.16, "learning_rate": 4.993895251141571e-05, "loss": 0.0689, "step": 7540, "task_loss": 0.15557356178760529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998844415466628, "compression_loss": 0.0, "distillation_loss": 0.04140976071357727, "epoch": 7.16, "learning_rate": 4.9938206067869334e-05, "loss": 0.0454, "step": 7541, "task_loss": 0.0813070684671402 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998848332698995, "compression_loss": 0.0, "distillation_loss": 0.036964789032936096, "epoch": 7.16, "learning_rate": 4.993745509417772e-05, "loss": 0.0396, "step": 7542, "task_loss": 0.06300060451030731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998852241068848, "compression_loss": 0.0, "distillation_loss": 0.14604540169239044, "epoch": 7.16, "learning_rate": 4.9936699590477296e-05, "loss": 0.1421, "step": 7543, "task_loss": 0.10691849142313004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998856140586226, "compression_loss": 0.0, "distillation_loss": 0.10992075502872467, "epoch": 7.16, "learning_rate": 4.9935939556905295e-05, "loss": 0.1188, "step": 7544, "task_loss": 0.19906115531921387 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998860031261164, "compression_loss": 0.0, "distillation_loss": 0.014540485106408596, "epoch": 7.17, "learning_rate": 4.993517499359978e-05, "loss": 0.0135, "step": 7545, "task_loss": 0.003651769831776619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79988639131037, "compression_loss": 0.0, "distillation_loss": 0.06785355508327484, "epoch": 7.17, "learning_rate": 4.993440590069963e-05, "loss": 0.0756, "step": 7546, "task_loss": 0.1453818380832672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799886778612387, "compression_loss": 0.0, "distillation_loss": 0.08633054792881012, "epoch": 7.17, "learning_rate": 4.993363227834457e-05, "loss": 0.0958, "step": 7547, "task_loss": 0.18063369393348694 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998871650331711, "compression_loss": 0.0, "distillation_loss": 0.13422691822052002, "epoch": 7.17, "learning_rate": 4.9932854126675124e-05, "loss": 0.1375, "step": 7548, "task_loss": 0.16670459508895874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998875505737261, "compression_loss": 0.0, "distillation_loss": 0.0358545184135437, "epoch": 7.17, "learning_rate": 4.993207144583264e-05, "loss": 0.0333, "step": 7549, "task_loss": 0.009983271360397339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998879352350555, "compression_loss": 0.0, "distillation_loss": 0.03655195236206055, "epoch": 7.17, "learning_rate": 4.993128423595931e-05, "loss": 0.0333, "step": 7550, "task_loss": 0.0044655874371528625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799888319018163, "compression_loss": 0.0, "distillation_loss": 0.04761989042162895, "epoch": 7.17, "learning_rate": 4.9930492497198125e-05, "loss": 0.0541, "step": 7551, "task_loss": 0.11243405193090439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998887019240526, "compression_loss": 0.0, "distillation_loss": 0.04824792221188545, "epoch": 7.17, "learning_rate": 4.992969622969292e-05, "loss": 0.0455, "step": 7552, "task_loss": 0.020604906603693962 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998890839537275, "compression_loss": 0.0, "distillation_loss": 0.02487030252814293, "epoch": 7.17, "learning_rate": 4.992889543358832e-05, "loss": 0.0281, "step": 7553, "task_loss": 0.05736660212278366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998894651081917, "compression_loss": 0.0, "distillation_loss": 0.040249839425086975, "epoch": 7.17, "learning_rate": 4.9928090109029817e-05, "loss": 0.0528, "step": 7554, "task_loss": 0.16572295129299164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998898453884489, "compression_loss": 0.0, "distillation_loss": 0.08049503713846207, "epoch": 7.17, "learning_rate": 4.9927280256163686e-05, "loss": 0.0842, "step": 7555, "task_loss": 0.11797265708446503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998902247955025, "compression_loss": 0.0, "distillation_loss": 0.0269448421895504, "epoch": 7.18, "learning_rate": 4.992646587513705e-05, "loss": 0.0313, "step": 7556, "task_loss": 0.07026375085115433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998906033303564, "compression_loss": 0.0, "distillation_loss": 0.03366638720035553, "epoch": 7.18, "learning_rate": 4.9925646966097835e-05, "loss": 0.0313, "step": 7557, "task_loss": 0.010441986843943596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998909809940143, "compression_loss": 0.0, "distillation_loss": 0.015848444774746895, "epoch": 7.18, "learning_rate": 4.99248235291948e-05, "loss": 0.0309, "step": 7558, "task_loss": 0.16670146584510803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998913577874799, "compression_loss": 0.0, "distillation_loss": 0.05733553692698479, "epoch": 7.18, "learning_rate": 4.9923995564577544e-05, "loss": 0.0547, "step": 7559, "task_loss": 0.03086872212588787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998917337117567, "compression_loss": 0.0, "distillation_loss": 0.03503969684243202, "epoch": 7.18, "learning_rate": 4.992316307239645e-05, "loss": 0.0492, "step": 7560, "task_loss": 0.17629651725292206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998921087678484, "compression_loss": 0.0, "distillation_loss": 0.07000732421875, "epoch": 7.18, "learning_rate": 4.992232605280276e-05, "loss": 0.078, "step": 7561, "task_loss": 0.14994311332702637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799892482956759, "compression_loss": 0.0, "distillation_loss": 0.042889855802059174, "epoch": 7.18, "learning_rate": 4.992148450594851e-05, "loss": 0.039, "step": 7562, "task_loss": 0.0036024488508701324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998928562794918, "compression_loss": 0.0, "distillation_loss": 0.012830915860831738, "epoch": 7.18, "learning_rate": 4.9920638431986574e-05, "loss": 0.0119, "step": 7563, "task_loss": 0.0039144158363342285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998932287370506, "compression_loss": 0.0, "distillation_loss": 0.017170187085866928, "epoch": 7.18, "learning_rate": 4.991978783107065e-05, "loss": 0.0199, "step": 7564, "task_loss": 0.04400904104113579 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998936003304392, "compression_loss": 0.0, "distillation_loss": 0.019753381609916687, "epoch": 7.18, "learning_rate": 4.9918932703355256e-05, "loss": 0.0186, "step": 7565, "task_loss": 0.008108856156468391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998939710606612, "compression_loss": 0.0, "distillation_loss": 0.11725122481584549, "epoch": 7.19, "learning_rate": 4.991807304899572e-05, "loss": 0.1261, "step": 7566, "task_loss": 0.20566239953041077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998943409287202, "compression_loss": 0.0, "distillation_loss": 0.06889253109693527, "epoch": 7.19, "learning_rate": 4.991720886814821e-05, "loss": 0.0706, "step": 7567, "task_loss": 0.08627396821975708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998947099356201, "compression_loss": 0.0, "distillation_loss": 0.13849744200706482, "epoch": 7.19, "learning_rate": 4.99163401609697e-05, "loss": 0.1404, "step": 7568, "task_loss": 0.15727216005325317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998950780823644, "compression_loss": 0.0, "distillation_loss": 0.0490923747420311, "epoch": 7.19, "learning_rate": 4.991546692761801e-05, "loss": 0.047, "step": 7569, "task_loss": 0.028280524536967278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998954453699567, "compression_loss": 0.0, "distillation_loss": 0.0508795864880085, "epoch": 7.19, "learning_rate": 4.991458916825176e-05, "loss": 0.0486, "step": 7570, "task_loss": 0.027954697608947754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799895811799401, "compression_loss": 0.0, "distillation_loss": 0.023934636265039444, "epoch": 7.19, "learning_rate": 4.991370688303039e-05, "loss": 0.0246, "step": 7571, "task_loss": 0.030092671513557434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998961773717007, "compression_loss": 0.0, "distillation_loss": 0.016041746363043785, "epoch": 7.19, "learning_rate": 4.9912820072114185e-05, "loss": 0.0149, "step": 7572, "task_loss": 0.0041885413229465485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998965420878595, "compression_loss": 0.0, "distillation_loss": 0.051787007600069046, "epoch": 7.19, "learning_rate": 4.9911928735664224e-05, "loss": 0.0531, "step": 7573, "task_loss": 0.0652824267745018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998969059488813, "compression_loss": 0.0, "distillation_loss": 0.11679676175117493, "epoch": 7.19, "learning_rate": 4.991103287384244e-05, "loss": 0.1229, "step": 7574, "task_loss": 0.1777617633342743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998972689557696, "compression_loss": 0.0, "distillation_loss": 0.02287183701992035, "epoch": 7.19, "learning_rate": 4.9910132486811555e-05, "loss": 0.0216, "step": 7575, "task_loss": 0.009705502539873123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799897631109528, "compression_loss": 0.0, "distillation_loss": 0.07535935938358307, "epoch": 7.19, "learning_rate": 4.990922757473514e-05, "loss": 0.0712, "step": 7576, "task_loss": 0.034015409648418427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998979924111604, "compression_loss": 0.0, "distillation_loss": 0.03653284162282944, "epoch": 7.2, "learning_rate": 4.990831813777757e-05, "loss": 0.0336, "step": 7577, "task_loss": 0.006952434778213501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998983528616704, "compression_loss": 0.0, "distillation_loss": 0.06351464986801147, "epoch": 7.2, "learning_rate": 4.990740417610406e-05, "loss": 0.0599, "step": 7578, "task_loss": 0.02752096578478813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998987124620617, "compression_loss": 0.0, "distillation_loss": 0.019939813762903214, "epoch": 7.2, "learning_rate": 4.9906485689880613e-05, "loss": 0.0185, "step": 7579, "task_loss": 0.005348971113562584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998990712133379, "compression_loss": 0.0, "distillation_loss": 0.014425119385123253, "epoch": 7.2, "learning_rate": 4.9905562679274096e-05, "loss": 0.0141, "step": 7580, "task_loss": 0.011267106980085373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7998994291165028, "compression_loss": 0.0, "distillation_loss": 0.1040351465344429, "epoch": 7.2, "learning_rate": 4.9904635144452164e-05, "loss": 0.1014, "step": 7581, "task_loss": 0.07765233516693115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79989978617256, "compression_loss": 0.0, "distillation_loss": 0.049760229885578156, "epoch": 7.2, "learning_rate": 4.990370308558332e-05, "loss": 0.0527, "step": 7582, "task_loss": 0.07866282761096954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999001423825131, "compression_loss": 0.0, "distillation_loss": 0.014256338588893414, "epoch": 7.2, "learning_rate": 4.9902766502836874e-05, "loss": 0.0235, "step": 7583, "task_loss": 0.10717885941267014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999004977473658, "compression_loss": 0.0, "distillation_loss": 0.084557443857193, "epoch": 7.2, "learning_rate": 4.9901825396382965e-05, "loss": 0.0891, "step": 7584, "task_loss": 0.13022615015506744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799900852268122, "compression_loss": 0.0, "distillation_loss": 0.1039082333445549, "epoch": 7.2, "learning_rate": 4.990087976639254e-05, "loss": 0.1037, "step": 7585, "task_loss": 0.10136047005653381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999012059457853, "compression_loss": 0.0, "distillation_loss": 0.08127206563949585, "epoch": 7.2, "learning_rate": 4.989992961303738e-05, "loss": 0.0907, "step": 7586, "task_loss": 0.17568367719650269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999015587813593, "compression_loss": 0.0, "distillation_loss": 0.012685288675129414, "epoch": 7.21, "learning_rate": 4.989897493649008e-05, "loss": 0.0195, "step": 7587, "task_loss": 0.08097560703754425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999019107758476, "compression_loss": 0.0, "distillation_loss": 0.03525643050670624, "epoch": 7.21, "learning_rate": 4.989801573692408e-05, "loss": 0.0407, "step": 7588, "task_loss": 0.08939573168754578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799902261930254, "compression_loss": 0.0, "distillation_loss": 0.04309917986392975, "epoch": 7.21, "learning_rate": 4.989705201451361e-05, "loss": 0.0391, "step": 7589, "task_loss": 0.00266990065574646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999026122455822, "compression_loss": 0.0, "distillation_loss": 0.018025081604719162, "epoch": 7.21, "learning_rate": 4.989608376943373e-05, "loss": 0.0172, "step": 7590, "task_loss": 0.009299803525209427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999029617228359, "compression_loss": 0.0, "distillation_loss": 0.011557912454009056, "epoch": 7.21, "learning_rate": 4.9895111001860335e-05, "loss": 0.0108, "step": 7591, "task_loss": 0.004404161125421524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999033103630186, "compression_loss": 0.0, "distillation_loss": 0.021339278668165207, "epoch": 7.21, "learning_rate": 4.989413371197013e-05, "loss": 0.0201, "step": 7592, "task_loss": 0.008754091337323189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999036581671343, "compression_loss": 0.0, "distillation_loss": 0.08135688304901123, "epoch": 7.21, "learning_rate": 4.989315189994065e-05, "loss": 0.087, "step": 7593, "task_loss": 0.13778801262378693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999040051361864, "compression_loss": 0.0, "distillation_loss": 0.020718682557344437, "epoch": 7.21, "learning_rate": 4.9892165565950235e-05, "loss": 0.0198, "step": 7594, "task_loss": 0.011542653664946556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999043512711788, "compression_loss": 0.0, "distillation_loss": 0.029304485768079758, "epoch": 7.21, "learning_rate": 4.9891174710178054e-05, "loss": 0.0359, "step": 7595, "task_loss": 0.09544922411441803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999046965731149, "compression_loss": 0.0, "distillation_loss": 0.012081381864845753, "epoch": 7.21, "learning_rate": 4.9890179332804125e-05, "loss": 0.0112, "step": 7596, "task_loss": 0.003652777522802353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999050410429985, "compression_loss": 0.0, "distillation_loss": 0.032954856753349304, "epoch": 7.21, "learning_rate": 4.988917943400924e-05, "loss": 0.032, "step": 7597, "task_loss": 0.023814279586076736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999053846818335, "compression_loss": 0.0, "distillation_loss": 0.019010456278920174, "epoch": 7.22, "learning_rate": 4.988817501397505e-05, "loss": 0.0209, "step": 7598, "task_loss": 0.03756894916296005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999057274906234, "compression_loss": 0.0, "distillation_loss": 0.05136597529053688, "epoch": 7.22, "learning_rate": 4.9887166072884e-05, "loss": 0.0477, "step": 7599, "task_loss": 0.014553211629390717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999060694703718, "compression_loss": 0.0, "distillation_loss": 0.02315210923552513, "epoch": 7.22, "learning_rate": 4.988615261091938e-05, "loss": 0.0213, "step": 7600, "task_loss": 0.004539225250482559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999064106220825, "compression_loss": 0.0, "distillation_loss": 0.05613788589835167, "epoch": 7.22, "learning_rate": 4.9885134628265276e-05, "loss": 0.0686, "step": 7601, "task_loss": 0.1807435154914856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999067509467591, "compression_loss": 0.0, "distillation_loss": 0.017415829002857208, "epoch": 7.22, "learning_rate": 4.988411212510663e-05, "loss": 0.0263, "step": 7602, "task_loss": 0.10614414513111115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999070904454054, "compression_loss": 0.0, "distillation_loss": 0.02971852757036686, "epoch": 7.22, "learning_rate": 4.988308510162917e-05, "loss": 0.0275, "step": 7603, "task_loss": 0.007290884852409363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799907429119025, "compression_loss": 0.0, "distillation_loss": 0.09431088715791702, "epoch": 7.22, "learning_rate": 4.988205355801945e-05, "loss": 0.0992, "step": 7604, "task_loss": 0.14289163053035736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999077669686216, "compression_loss": 0.0, "distillation_loss": 0.11817564815282822, "epoch": 7.22, "learning_rate": 4.988101749446488e-05, "loss": 0.1131, "step": 7605, "task_loss": 0.06737690418958664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799908103995199, "compression_loss": 0.0, "distillation_loss": 0.05987474322319031, "epoch": 7.22, "learning_rate": 4.987997691115366e-05, "loss": 0.0599, "step": 7606, "task_loss": 0.060150373727083206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999084401997606, "compression_loss": 0.0, "distillation_loss": 0.1141742616891861, "epoch": 7.22, "learning_rate": 4.98789318082748e-05, "loss": 0.1098, "step": 7607, "task_loss": 0.07077798992395401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999087755833103, "compression_loss": 0.0, "distillation_loss": 0.024525757879018784, "epoch": 7.23, "learning_rate": 4.987788218601816e-05, "loss": 0.0302, "step": 7608, "task_loss": 0.08100198209285736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999091101468517, "compression_loss": 0.0, "distillation_loss": 0.040195684880018234, "epoch": 7.23, "learning_rate": 4.987682804457441e-05, "loss": 0.0451, "step": 7609, "task_loss": 0.08934106677770615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999094438913886, "compression_loss": 0.0, "distillation_loss": 0.044671397656202316, "epoch": 7.23, "learning_rate": 4.987576938413504e-05, "loss": 0.0412, "step": 7610, "task_loss": 0.009734295308589935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999097768179245, "compression_loss": 0.0, "distillation_loss": 0.017387911677360535, "epoch": 7.23, "learning_rate": 4.987470620489235e-05, "loss": 0.0164, "step": 7611, "task_loss": 0.007594836875796318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999101089274633, "compression_loss": 0.0, "distillation_loss": 0.04098183289170265, "epoch": 7.23, "learning_rate": 4.9873638507039486e-05, "loss": 0.0408, "step": 7612, "task_loss": 0.039046209305524826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999104402210084, "compression_loss": 0.0, "distillation_loss": 0.0328001007437706, "epoch": 7.23, "learning_rate": 4.987256629077039e-05, "loss": 0.0379, "step": 7613, "task_loss": 0.08381561934947968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999107706995638, "compression_loss": 0.0, "distillation_loss": 0.06902207434177399, "epoch": 7.23, "learning_rate": 4.987148955627985e-05, "loss": 0.0674, "step": 7614, "task_loss": 0.05302996188402176 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999111003641329, "compression_loss": 0.0, "distillation_loss": 0.015985846519470215, "epoch": 7.23, "learning_rate": 4.987040830376344e-05, "loss": 0.0202, "step": 7615, "task_loss": 0.05846152827143669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999114292157196, "compression_loss": 0.0, "distillation_loss": 0.04634019732475281, "epoch": 7.23, "learning_rate": 4.9869322533417596e-05, "loss": 0.0425, "step": 7616, "task_loss": 0.008089037612080574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999117572553275, "compression_loss": 0.0, "distillation_loss": 0.0537746362388134, "epoch": 7.23, "learning_rate": 4.9868232245439525e-05, "loss": 0.0524, "step": 7617, "task_loss": 0.039632245898246765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999120844839601, "compression_loss": 0.0, "distillation_loss": 0.05178552865982056, "epoch": 7.23, "learning_rate": 4.986713744002731e-05, "loss": 0.0538, "step": 7618, "task_loss": 0.07191041857004166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999124109026214, "compression_loss": 0.0, "distillation_loss": 0.025834370404481888, "epoch": 7.24, "learning_rate": 4.9866038117379824e-05, "loss": 0.0236, "step": 7619, "task_loss": 0.003722256049513817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999127365123149, "compression_loss": 0.0, "distillation_loss": 0.19725723564624786, "epoch": 7.24, "learning_rate": 4.986493427769675e-05, "loss": 0.1893, "step": 7620, "task_loss": 0.11761374771595001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999130613140443, "compression_loss": 0.0, "distillation_loss": 0.05075707659125328, "epoch": 7.24, "learning_rate": 4.986382592117861e-05, "loss": 0.0515, "step": 7621, "task_loss": 0.05864773318171501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999133853088133, "compression_loss": 0.0, "distillation_loss": 0.021583644673228264, "epoch": 7.24, "learning_rate": 4.986271304802675e-05, "loss": 0.0223, "step": 7622, "task_loss": 0.028752833604812622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999137084976256, "compression_loss": 0.0, "distillation_loss": 0.12262220680713654, "epoch": 7.24, "learning_rate": 4.986159565844333e-05, "loss": 0.1209, "step": 7623, "task_loss": 0.10493774712085724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999140308814848, "compression_loss": 0.0, "distillation_loss": 0.054139602929353714, "epoch": 7.24, "learning_rate": 4.986047375263131e-05, "loss": 0.0749, "step": 7624, "task_loss": 0.2614555060863495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999143524613948, "compression_loss": 0.0, "distillation_loss": 0.011302494443953037, "epoch": 7.24, "learning_rate": 4.9859347330794515e-05, "loss": 0.0164, "step": 7625, "task_loss": 0.0621810182929039 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799914673238359, "compression_loss": 0.0, "distillation_loss": 0.019549107179045677, "epoch": 7.24, "learning_rate": 4.985821639313755e-05, "loss": 0.0179, "step": 7626, "task_loss": 0.003164045512676239 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999149932133811, "compression_loss": 0.0, "distillation_loss": 0.07022275030612946, "epoch": 7.24, "learning_rate": 4.985708093986586e-05, "loss": 0.0732, "step": 7627, "task_loss": 0.09962170571088791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799915312387465, "compression_loss": 0.0, "distillation_loss": 0.042909909039735794, "epoch": 7.24, "learning_rate": 4.98559409711857e-05, "loss": 0.0394, "step": 7628, "task_loss": 0.007330624386668205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999156307616143, "compression_loss": 0.0, "distillation_loss": 0.043095022439956665, "epoch": 7.25, "learning_rate": 4.985479648730416e-05, "loss": 0.0452, "step": 7629, "task_loss": 0.06463564932346344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999159483368327, "compression_loss": 0.0, "distillation_loss": 0.05689408257603645, "epoch": 7.25, "learning_rate": 4.985364748842914e-05, "loss": 0.0657, "step": 7630, "task_loss": 0.14448405802249908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999162651141237, "compression_loss": 0.0, "distillation_loss": 0.09874869883060455, "epoch": 7.25, "learning_rate": 4.985249397476934e-05, "loss": 0.103, "step": 7631, "task_loss": 0.14085200428962708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999165810944913, "compression_loss": 0.0, "distillation_loss": 0.10595101863145828, "epoch": 7.25, "learning_rate": 4.985133594653434e-05, "loss": 0.1023, "step": 7632, "task_loss": 0.06989569962024689 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999168962789388, "compression_loss": 0.0, "distillation_loss": 0.11988788843154907, "epoch": 7.25, "learning_rate": 4.9850173403934466e-05, "loss": 0.1167, "step": 7633, "task_loss": 0.08802229911088943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999172106684702, "compression_loss": 0.0, "distillation_loss": 0.026174481958150864, "epoch": 7.25, "learning_rate": 4.9849006347180915e-05, "loss": 0.0297, "step": 7634, "task_loss": 0.06151473894715309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799917524264089, "compression_loss": 0.0, "distillation_loss": 0.02765459194779396, "epoch": 7.25, "learning_rate": 4.9847834776485694e-05, "loss": 0.0379, "step": 7635, "task_loss": 0.12964758276939392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999178370667991, "compression_loss": 0.0, "distillation_loss": 0.02943515032529831, "epoch": 7.25, "learning_rate": 4.984665869206161e-05, "loss": 0.0346, "step": 7636, "task_loss": 0.08072115480899811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999181490776038, "compression_loss": 0.0, "distillation_loss": 0.09545106440782547, "epoch": 7.25, "learning_rate": 4.984547809412231e-05, "loss": 0.0933, "step": 7637, "task_loss": 0.07358794659376144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999184602975071, "compression_loss": 0.0, "distillation_loss": 0.030496662482619286, "epoch": 7.25, "learning_rate": 4.984429298288227e-05, "loss": 0.0326, "step": 7638, "task_loss": 0.051075611263513565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999187707275127, "compression_loss": 0.0, "distillation_loss": 0.07517765462398529, "epoch": 7.25, "learning_rate": 4.984310335855674e-05, "loss": 0.0973, "step": 7639, "task_loss": 0.29597049951553345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799919080368624, "compression_loss": 0.0, "distillation_loss": 0.06372272223234177, "epoch": 7.26, "learning_rate": 4.9841909221361855e-05, "loss": 0.0631, "step": 7640, "task_loss": 0.05780046060681343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799919389221845, "compression_loss": 0.0, "distillation_loss": 0.041298843920230865, "epoch": 7.26, "learning_rate": 4.9840710571514515e-05, "loss": 0.0489, "step": 7641, "task_loss": 0.1176719143986702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999196972881792, "compression_loss": 0.0, "distillation_loss": 0.027051087468862534, "epoch": 7.26, "learning_rate": 4.9839507409232464e-05, "loss": 0.0249, "step": 7642, "task_loss": 0.005155773833394051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999200045686303, "compression_loss": 0.0, "distillation_loss": 0.08592119812965393, "epoch": 7.26, "learning_rate": 4.983829973473426e-05, "loss": 0.0866, "step": 7643, "task_loss": 0.09253047406673431 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799920311064202, "compression_loss": 0.0, "distillation_loss": 0.04047606140375137, "epoch": 7.26, "learning_rate": 4.983708754823929e-05, "loss": 0.0374, "step": 7644, "task_loss": 0.009331891313195229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799920616775898, "compression_loss": 0.0, "distillation_loss": 0.058635760098695755, "epoch": 7.26, "learning_rate": 4.983587084996776e-05, "loss": 0.0624, "step": 7645, "task_loss": 0.09588063508272171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799920921704722, "compression_loss": 0.0, "distillation_loss": 0.20887014269828796, "epoch": 7.26, "learning_rate": 4.9834649640140664e-05, "loss": 0.1998, "step": 7646, "task_loss": 0.11826451122760773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999212258516776, "compression_loss": 0.0, "distillation_loss": 0.0295681431889534, "epoch": 7.26, "learning_rate": 4.9833423918979864e-05, "loss": 0.0274, "step": 7647, "task_loss": 0.00763612799346447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999215292177686, "compression_loss": 0.0, "distillation_loss": 0.03998691588640213, "epoch": 7.26, "learning_rate": 4.983219368670801e-05, "loss": 0.0477, "step": 7648, "task_loss": 0.117280974984169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999218318039986, "compression_loss": 0.0, "distillation_loss": 0.04406122863292694, "epoch": 7.26, "learning_rate": 4.983095894354858e-05, "loss": 0.0483, "step": 7649, "task_loss": 0.08681491017341614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999221336113712, "compression_loss": 0.0, "distillation_loss": 0.02541101723909378, "epoch": 7.26, "learning_rate": 4.9829719689725865e-05, "loss": 0.0278, "step": 7650, "task_loss": 0.04967557266354561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999224346408902, "compression_loss": 0.0, "distillation_loss": 0.01671786792576313, "epoch": 7.27, "learning_rate": 4.982847592546499e-05, "loss": 0.0154, "step": 7651, "task_loss": 0.0038224849849939346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999227348935593, "compression_loss": 0.0, "distillation_loss": 0.03665540739893913, "epoch": 7.27, "learning_rate": 4.982722765099189e-05, "loss": 0.0429, "step": 7652, "task_loss": 0.09867965430021286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999230343703821, "compression_loss": 0.0, "distillation_loss": 0.044541992247104645, "epoch": 7.27, "learning_rate": 4.982597486653332e-05, "loss": 0.0519, "step": 7653, "task_loss": 0.11796452105045319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999233330723624, "compression_loss": 0.0, "distillation_loss": 0.030439257621765137, "epoch": 7.27, "learning_rate": 4.982471757231685e-05, "loss": 0.0352, "step": 7654, "task_loss": 0.07802172005176544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999236310005038, "compression_loss": 0.0, "distillation_loss": 0.0167954470962286, "epoch": 7.27, "learning_rate": 4.982345576857087e-05, "loss": 0.0169, "step": 7655, "task_loss": 0.017974717542529106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999239281558099, "compression_loss": 0.0, "distillation_loss": 0.07342584431171417, "epoch": 7.27, "learning_rate": 4.9822189455524604e-05, "loss": 0.0754, "step": 7656, "task_loss": 0.09314364194869995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999242245392846, "compression_loss": 0.0, "distillation_loss": 0.1788443773984909, "epoch": 7.27, "learning_rate": 4.982091863340808e-05, "loss": 0.1738, "step": 7657, "task_loss": 0.12857407331466675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999245201519313, "compression_loss": 0.0, "distillation_loss": 0.12496727705001831, "epoch": 7.27, "learning_rate": 4.9819643302452146e-05, "loss": 0.1192, "step": 7658, "task_loss": 0.06772095710039139 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999248149947539, "compression_loss": 0.0, "distillation_loss": 0.03145987540483475, "epoch": 7.27, "learning_rate": 4.981836346288847e-05, "loss": 0.029, "step": 7659, "task_loss": 0.00681840255856514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799925109068756, "compression_loss": 0.0, "distillation_loss": 0.010385725647211075, "epoch": 7.27, "learning_rate": 4.981707911494955e-05, "loss": 0.0098, "step": 7660, "task_loss": 0.004181254655122757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999254023749414, "compression_loss": 0.0, "distillation_loss": 0.011662168428301811, "epoch": 7.28, "learning_rate": 4.981579025886868e-05, "loss": 0.0109, "step": 7661, "task_loss": 0.0035572052001953125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999256949143135, "compression_loss": 0.0, "distillation_loss": 0.02936125546693802, "epoch": 7.28, "learning_rate": 4.981449689488e-05, "loss": 0.0453, "step": 7662, "task_loss": 0.18874679505825043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999259866878764, "compression_loss": 0.0, "distillation_loss": 0.07481378316879272, "epoch": 7.28, "learning_rate": 4.981319902321846e-05, "loss": 0.072, "step": 7663, "task_loss": 0.04644451290369034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999262776966334, "compression_loss": 0.0, "distillation_loss": 0.10028335452079773, "epoch": 7.28, "learning_rate": 4.981189664411981e-05, "loss": 0.1078, "step": 7664, "task_loss": 0.17516379058361053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999265679415883, "compression_loss": 0.0, "distillation_loss": 0.02721228078007698, "epoch": 7.28, "learning_rate": 4.981058975782063e-05, "loss": 0.034, "step": 7665, "task_loss": 0.09542686492204666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999268574237448, "compression_loss": 0.0, "distillation_loss": 0.035428885370492935, "epoch": 7.28, "learning_rate": 4.9809278364558336e-05, "loss": 0.0378, "step": 7666, "task_loss": 0.05906771123409271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999271461441066, "compression_loss": 0.0, "distillation_loss": 0.04637853801250458, "epoch": 7.28, "learning_rate": 4.980796246457115e-05, "loss": 0.0482, "step": 7667, "task_loss": 0.06469672918319702 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999274341036775, "compression_loss": 0.0, "distillation_loss": 0.060008976608514786, "epoch": 7.28, "learning_rate": 4.9806642058098105e-05, "loss": 0.0557, "step": 7668, "task_loss": 0.016873404383659363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799927721303461, "compression_loss": 0.0, "distillation_loss": 0.02527848817408085, "epoch": 7.28, "learning_rate": 4.980531714537905e-05, "loss": 0.0239, "step": 7669, "task_loss": 0.011759728193283081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999280077444608, "compression_loss": 0.0, "distillation_loss": 0.021292556077241898, "epoch": 7.28, "learning_rate": 4.980398772665468e-05, "loss": 0.0197, "step": 7670, "task_loss": 0.005023036152124405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999282934276807, "compression_loss": 0.0, "distillation_loss": 0.028033584356307983, "epoch": 7.28, "learning_rate": 4.980265380216649e-05, "loss": 0.0286, "step": 7671, "task_loss": 0.03369821235537529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999285783541241, "compression_loss": 0.0, "distillation_loss": 0.028027424588799477, "epoch": 7.29, "learning_rate": 4.9801315372156775e-05, "loss": 0.0488, "step": 7672, "task_loss": 0.23589852452278137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999288625247951, "compression_loss": 0.0, "distillation_loss": 0.048800766468048096, "epoch": 7.29, "learning_rate": 4.979997243686868e-05, "loss": 0.0462, "step": 7673, "task_loss": 0.022747982293367386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999291459406971, "compression_loss": 0.0, "distillation_loss": 0.040504857897758484, "epoch": 7.29, "learning_rate": 4.979862499654615e-05, "loss": 0.0495, "step": 7674, "task_loss": 0.13015000522136688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999294286028338, "compression_loss": 0.0, "distillation_loss": 0.017775043845176697, "epoch": 7.29, "learning_rate": 4.9797273051433966e-05, "loss": 0.0164, "step": 7675, "task_loss": 0.004127055406570435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799929710512209, "compression_loss": 0.0, "distillation_loss": 0.13806067407131195, "epoch": 7.29, "learning_rate": 4.97959166017777e-05, "loss": 0.1467, "step": 7676, "task_loss": 0.2246706783771515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999299916698263, "compression_loss": 0.0, "distillation_loss": 0.05745643004775047, "epoch": 7.29, "learning_rate": 4.979455564782377e-05, "loss": 0.0535, "step": 7677, "task_loss": 0.017686685547232628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999302720766893, "compression_loss": 0.0, "distillation_loss": 0.07123734056949615, "epoch": 7.29, "learning_rate": 4.9793190189819395e-05, "loss": 0.0862, "step": 7678, "task_loss": 0.22124677896499634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999305517338018, "compression_loss": 0.0, "distillation_loss": 0.01819181814789772, "epoch": 7.29, "learning_rate": 4.979182022801262e-05, "loss": 0.0174, "step": 7679, "task_loss": 0.01074174977838993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999308306421675, "compression_loss": 0.0, "distillation_loss": 0.045470044016838074, "epoch": 7.29, "learning_rate": 4.979044576265229e-05, "loss": 0.0435, "step": 7680, "task_loss": 0.025997349992394447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999311088027901, "compression_loss": 0.0, "distillation_loss": 0.027925997972488403, "epoch": 7.29, "learning_rate": 4.9789066793988106e-05, "loss": 0.0256, "step": 7681, "task_loss": 0.004869425669312477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999313862166731, "compression_loss": 0.0, "distillation_loss": 0.1813095510005951, "epoch": 7.3, "learning_rate": 4.978768332227054e-05, "loss": 0.1863, "step": 7682, "task_loss": 0.23134742677211761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999316628848204, "compression_loss": 0.0, "distillation_loss": 0.015448532067239285, "epoch": 7.3, "learning_rate": 4.9786295347750936e-05, "loss": 0.0143, "step": 7683, "task_loss": 0.00408821739256382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999319388082355, "compression_loss": 0.0, "distillation_loss": 0.0803908184170723, "epoch": 7.3, "learning_rate": 4.9784902870681406e-05, "loss": 0.0909, "step": 7684, "task_loss": 0.18548454344272614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999322139879224, "compression_loss": 0.0, "distillation_loss": 0.060036517679691315, "epoch": 7.3, "learning_rate": 4.97835058913149e-05, "loss": 0.0655, "step": 7685, "task_loss": 0.11482767760753632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999324884248843, "compression_loss": 0.0, "distillation_loss": 0.027849406003952026, "epoch": 7.3, "learning_rate": 4.9782104409905186e-05, "loss": 0.0506, "step": 7686, "task_loss": 0.2550583779811859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999327621201252, "compression_loss": 0.0, "distillation_loss": 0.05457767844200134, "epoch": 7.3, "learning_rate": 4.9780698426706864e-05, "loss": 0.0507, "step": 7687, "task_loss": 0.016233546659350395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999330350746487, "compression_loss": 0.0, "distillation_loss": 0.09315378218889236, "epoch": 7.3, "learning_rate": 4.977928794197532e-05, "loss": 0.0893, "step": 7688, "task_loss": 0.05484301596879959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999333072894586, "compression_loss": 0.0, "distillation_loss": 0.030269654467701912, "epoch": 7.3, "learning_rate": 4.9777872955966785e-05, "loss": 0.0351, "step": 7689, "task_loss": 0.0781378373503685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999335787655584, "compression_loss": 0.0, "distillation_loss": 0.05370612442493439, "epoch": 7.3, "learning_rate": 4.97764534689383e-05, "loss": 0.0585, "step": 7690, "task_loss": 0.101178377866745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999338495039519, "compression_loss": 0.0, "distillation_loss": 0.07610142230987549, "epoch": 7.3, "learning_rate": 4.977502948114772e-05, "loss": 0.0748, "step": 7691, "task_loss": 0.06338108330965042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999341195056426, "compression_loss": 0.0, "distillation_loss": 0.1321769803762436, "epoch": 7.3, "learning_rate": 4.977360099285371e-05, "loss": 0.1249, "step": 7692, "task_loss": 0.05932076275348663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999343887716345, "compression_loss": 0.0, "distillation_loss": 0.02152719907462597, "epoch": 7.31, "learning_rate": 4.9772168004315765e-05, "loss": 0.0272, "step": 7693, "task_loss": 0.07799746096134186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799934657302931, "compression_loss": 0.0, "distillation_loss": 0.021207425743341446, "epoch": 7.31, "learning_rate": 4.9770730515794204e-05, "loss": 0.0265, "step": 7694, "task_loss": 0.07455786317586899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999349251005359, "compression_loss": 0.0, "distillation_loss": 0.0122305266559124, "epoch": 7.31, "learning_rate": 4.976928852755015e-05, "loss": 0.0193, "step": 7695, "task_loss": 0.08255228400230408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799935192165453, "compression_loss": 0.0, "distillation_loss": 0.016809673979878426, "epoch": 7.31, "learning_rate": 4.976784203984554e-05, "loss": 0.016, "step": 7696, "task_loss": 0.008326346054673195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999354584986857, "compression_loss": 0.0, "distillation_loss": 0.04126199334859848, "epoch": 7.31, "learning_rate": 4.976639105294314e-05, "loss": 0.0384, "step": 7697, "task_loss": 0.012333398684859276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999357241012379, "compression_loss": 0.0, "distillation_loss": 0.1654108762741089, "epoch": 7.31, "learning_rate": 4.976493556710653e-05, "loss": 0.1673, "step": 7698, "task_loss": 0.18462178111076355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999359889741132, "compression_loss": 0.0, "distillation_loss": 0.022537576034665108, "epoch": 7.31, "learning_rate": 4.976347558260011e-05, "loss": 0.0206, "step": 7699, "task_loss": 0.00269523449242115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999362531183153, "compression_loss": 0.0, "distillation_loss": 0.04379589483141899, "epoch": 7.31, "learning_rate": 4.976201109968908e-05, "loss": 0.0441, "step": 7700, "task_loss": 0.04727374017238617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999365165348479, "compression_loss": 0.0, "distillation_loss": 0.06073600426316261, "epoch": 7.31, "learning_rate": 4.976054211863949e-05, "loss": 0.0607, "step": 7701, "task_loss": 0.05995677784085274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999367792247146, "compression_loss": 0.0, "distillation_loss": 0.03459569066762924, "epoch": 7.31, "learning_rate": 4.9759068639718166e-05, "loss": 0.0339, "step": 7702, "task_loss": 0.027450790628790855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999370411889193, "compression_loss": 0.0, "distillation_loss": 0.0753156989812851, "epoch": 7.32, "learning_rate": 4.975759066319278e-05, "loss": 0.0687, "step": 7703, "task_loss": 0.00872378796339035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999373024284654, "compression_loss": 0.0, "distillation_loss": 0.029609953984618187, "epoch": 7.32, "learning_rate": 4.9756108189331825e-05, "loss": 0.0271, "step": 7704, "task_loss": 0.004311494529247284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999375629443568, "compression_loss": 0.0, "distillation_loss": 0.0182811226695776, "epoch": 7.32, "learning_rate": 4.975462121840458e-05, "loss": 0.0215, "step": 7705, "task_loss": 0.05089661478996277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999378227375971, "compression_loss": 0.0, "distillation_loss": 0.027917640283703804, "epoch": 7.32, "learning_rate": 4.975312975068118e-05, "loss": 0.0381, "step": 7706, "task_loss": 0.1298721432685852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999380818091898, "compression_loss": 0.0, "distillation_loss": 0.06191258504986763, "epoch": 7.32, "learning_rate": 4.975163378643255e-05, "loss": 0.058, "step": 7707, "task_loss": 0.02266554906964302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999383401601389, "compression_loss": 0.0, "distillation_loss": 0.02043573185801506, "epoch": 7.32, "learning_rate": 4.975013332593044e-05, "loss": 0.021, "step": 7708, "task_loss": 0.026547657325863838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999385977914479, "compression_loss": 0.0, "distillation_loss": 0.018625617027282715, "epoch": 7.32, "learning_rate": 4.97486283694474e-05, "loss": 0.0172, "step": 7709, "task_loss": 0.004806183278560638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999388547041205, "compression_loss": 0.0, "distillation_loss": 0.11342775821685791, "epoch": 7.32, "learning_rate": 4.974711891725684e-05, "loss": 0.1113, "step": 7710, "task_loss": 0.09240047633647919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999391108991605, "compression_loss": 0.0, "distillation_loss": 0.025932233780622482, "epoch": 7.32, "learning_rate": 4.9745604969632934e-05, "loss": 0.0389, "step": 7711, "task_loss": 0.15590538084506989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999393663775713, "compression_loss": 0.0, "distillation_loss": 0.15824320912361145, "epoch": 7.32, "learning_rate": 4.974408652685072e-05, "loss": 0.1603, "step": 7712, "task_loss": 0.1786690652370453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799939621140357, "compression_loss": 0.0, "distillation_loss": 0.1327803134918213, "epoch": 7.32, "learning_rate": 4.974256358918601e-05, "loss": 0.1301, "step": 7713, "task_loss": 0.1060781478881836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999398751885209, "compression_loss": 0.0, "distillation_loss": 0.024752333760261536, "epoch": 7.33, "learning_rate": 4.9741036156915464e-05, "loss": 0.0296, "step": 7714, "task_loss": 0.07322291284799576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799940128523067, "compression_loss": 0.0, "distillation_loss": 0.04586133733391762, "epoch": 7.33, "learning_rate": 4.973950423031655e-05, "loss": 0.0429, "step": 7715, "task_loss": 0.016736658290028572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999403811449985, "compression_loss": 0.0, "distillation_loss": 0.014082120731472969, "epoch": 7.33, "learning_rate": 4.9737967809667546e-05, "loss": 0.0198, "step": 7716, "task_loss": 0.07149508595466614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999406330553197, "compression_loss": 0.0, "distillation_loss": 0.029716944321990013, "epoch": 7.33, "learning_rate": 4.9736426895247545e-05, "loss": 0.0355, "step": 7717, "task_loss": 0.08792907744646072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999408842550338, "compression_loss": 0.0, "distillation_loss": 0.012465763837099075, "epoch": 7.33, "learning_rate": 4.973488148733647e-05, "loss": 0.0118, "step": 7718, "task_loss": 0.005948711186647415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999411347451447, "compression_loss": 0.0, "distillation_loss": 0.028147976845502853, "epoch": 7.33, "learning_rate": 4.973333158621505e-05, "loss": 0.0256, "step": 7719, "task_loss": 0.0030502378940582275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999413845266561, "compression_loss": 0.0, "distillation_loss": 0.26714271306991577, "epoch": 7.33, "learning_rate": 4.973177719216483e-05, "loss": 0.2669, "step": 7720, "task_loss": 0.26459577679634094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999416336005716, "compression_loss": 0.0, "distillation_loss": 0.0716482624411583, "epoch": 7.33, "learning_rate": 4.973021830546817e-05, "loss": 0.0705, "step": 7721, "task_loss": 0.06003544479608536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999418819678948, "compression_loss": 0.0, "distillation_loss": 0.03609579801559448, "epoch": 7.33, "learning_rate": 4.972865492640826e-05, "loss": 0.0456, "step": 7722, "task_loss": 0.13090340793132782 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999421296296296, "compression_loss": 0.0, "distillation_loss": 0.09618473052978516, "epoch": 7.33, "learning_rate": 4.972708705526908e-05, "loss": 0.1099, "step": 7723, "task_loss": 0.23324905335903168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999423765867796, "compression_loss": 0.0, "distillation_loss": 0.04280245676636696, "epoch": 7.34, "learning_rate": 4.972551469233545e-05, "loss": 0.0415, "step": 7724, "task_loss": 0.029460199177265167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999426228403484, "compression_loss": 0.0, "distillation_loss": 0.2212740182876587, "epoch": 7.34, "learning_rate": 4.9723937837892996e-05, "loss": 0.2304, "step": 7725, "task_loss": 0.3125489354133606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999428683913398, "compression_loss": 0.0, "distillation_loss": 0.04743258282542229, "epoch": 7.34, "learning_rate": 4.972235649222817e-05, "loss": 0.0538, "step": 7726, "task_loss": 0.11064188182353973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999431132407573, "compression_loss": 0.0, "distillation_loss": 0.04925276339054108, "epoch": 7.34, "learning_rate": 4.972077065562821e-05, "loss": 0.0582, "step": 7727, "task_loss": 0.13823464512825012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999433573896048, "compression_loss": 0.0, "distillation_loss": 0.05824163183569908, "epoch": 7.34, "learning_rate": 4.971918032838122e-05, "loss": 0.0562, "step": 7728, "task_loss": 0.03779482841491699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999436008388858, "compression_loss": 0.0, "distillation_loss": 0.08110888302326202, "epoch": 7.34, "learning_rate": 4.9717585510776065e-05, "loss": 0.08, "step": 7729, "task_loss": 0.06993526965379715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999438435896041, "compression_loss": 0.0, "distillation_loss": 0.026565806940197945, "epoch": 7.34, "learning_rate": 4.971598620310246e-05, "loss": 0.0247, "step": 7730, "task_loss": 0.007519997656345367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999440856427634, "compression_loss": 0.0, "distillation_loss": 0.03230992704629898, "epoch": 7.34, "learning_rate": 4.9714382405650926e-05, "loss": 0.0364, "step": 7731, "task_loss": 0.07325948029756546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999443269993672, "compression_loss": 0.0, "distillation_loss": 0.020386017858982086, "epoch": 7.34, "learning_rate": 4.971277411871281e-05, "loss": 0.0193, "step": 7732, "task_loss": 0.009038899093866348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999445676604194, "compression_loss": 0.0, "distillation_loss": 0.07571500539779663, "epoch": 7.34, "learning_rate": 4.971116134258025e-05, "loss": 0.0778, "step": 7733, "task_loss": 0.09688396751880646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999448076269235, "compression_loss": 0.0, "distillation_loss": 0.051084429025650024, "epoch": 7.34, "learning_rate": 4.9709544077546235e-05, "loss": 0.0616, "step": 7734, "task_loss": 0.1558133214712143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999450468998834, "compression_loss": 0.0, "distillation_loss": 0.05314343795180321, "epoch": 7.35, "learning_rate": 4.9707922323904524e-05, "loss": 0.0731, "step": 7735, "task_loss": 0.25305381417274475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999452854803026, "compression_loss": 0.0, "distillation_loss": 0.02783917263150215, "epoch": 7.35, "learning_rate": 4.9706296081949724e-05, "loss": 0.0271, "step": 7736, "task_loss": 0.020391501486301422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999455233691849, "compression_loss": 0.0, "distillation_loss": 0.13770464062690735, "epoch": 7.35, "learning_rate": 4.9704665351977266e-05, "loss": 0.1352, "step": 7737, "task_loss": 0.11255469173192978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999457605675339, "compression_loss": 0.0, "distillation_loss": 0.06904464215040207, "epoch": 7.35, "learning_rate": 4.9703030134283356e-05, "loss": 0.063, "step": 7738, "task_loss": 0.008208906278014183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999459970763533, "compression_loss": 0.0, "distillation_loss": 0.14025121927261353, "epoch": 7.35, "learning_rate": 4.970139042916506e-05, "loss": 0.1517, "step": 7739, "task_loss": 0.25468742847442627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999462328966467, "compression_loss": 0.0, "distillation_loss": 0.014299793168902397, "epoch": 7.35, "learning_rate": 4.969974623692023e-05, "loss": 0.0224, "step": 7740, "task_loss": 0.09552035480737686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999464680294179, "compression_loss": 0.0, "distillation_loss": 0.050735875964164734, "epoch": 7.35, "learning_rate": 4.969809755784753e-05, "loss": 0.046, "step": 7741, "task_loss": 0.003554748371243477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999467024756706, "compression_loss": 0.0, "distillation_loss": 0.11840305477380753, "epoch": 7.35, "learning_rate": 4.969644439224647e-05, "loss": 0.1174, "step": 7742, "task_loss": 0.10836636275053024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999469362364083, "compression_loss": 0.0, "distillation_loss": 0.03614204376935959, "epoch": 7.35, "learning_rate": 4.969478674041735e-05, "loss": 0.0386, "step": 7743, "task_loss": 0.061211228370666504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799947169312635, "compression_loss": 0.0, "distillation_loss": 0.054497864097356796, "epoch": 7.35, "learning_rate": 4.969312460266128e-05, "loss": 0.0529, "step": 7744, "task_loss": 0.0389748215675354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799947401705354, "compression_loss": 0.0, "distillation_loss": 0.0671176165342331, "epoch": 7.36, "learning_rate": 4.969145797928021e-05, "loss": 0.0707, "step": 7745, "task_loss": 0.10274288058280945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999476334155693, "compression_loss": 0.0, "distillation_loss": 0.03849276900291443, "epoch": 7.36, "learning_rate": 4.968978687057687e-05, "loss": 0.0368, "step": 7746, "task_loss": 0.02152402326464653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999478644442845, "compression_loss": 0.0, "distillation_loss": 0.0317964106798172, "epoch": 7.36, "learning_rate": 4.9688111276854846e-05, "loss": 0.036, "step": 7747, "task_loss": 0.0738469660282135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999480947925032, "compression_loss": 0.0, "distillation_loss": 0.10060693323612213, "epoch": 7.36, "learning_rate": 4.9686431198418515e-05, "loss": 0.0981, "step": 7748, "task_loss": 0.07562766224145889 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799948324461229, "compression_loss": 0.0, "distillation_loss": 0.17574167251586914, "epoch": 7.36, "learning_rate": 4.968474663557306e-05, "loss": 0.1741, "step": 7749, "task_loss": 0.1597302258014679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999485534514659, "compression_loss": 0.0, "distillation_loss": 0.05716216191649437, "epoch": 7.36, "learning_rate": 4.9683057588624494e-05, "loss": 0.0604, "step": 7750, "task_loss": 0.08970184624195099 }, { "epoch": 7.36, "eval_accuracy": 0.8761467889908257, "eval_loss": 0.5915129780769348, "eval_runtime": 18.0311, "eval_samples_per_second": 48.361, "eval_steps_per_second": 6.045, "step": 7750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999487817642172, "compression_loss": 0.0, "distillation_loss": 0.0786730945110321, "epoch": 7.36, "learning_rate": 4.968136405787964e-05, "loss": 0.0764, "step": 7751, "task_loss": 0.05567537993192673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999490094004869, "compression_loss": 0.0, "distillation_loss": 0.12053617835044861, "epoch": 7.36, "learning_rate": 4.967966604364614e-05, "loss": 0.12, "step": 7752, "task_loss": 0.11506214737892151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999492363612786, "compression_loss": 0.0, "distillation_loss": 0.13453137874603271, "epoch": 7.36, "learning_rate": 4.9677963546232445e-05, "loss": 0.1363, "step": 7753, "task_loss": 0.152423694729805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999494626475958, "compression_loss": 0.0, "distillation_loss": 0.05290111154317856, "epoch": 7.36, "learning_rate": 4.967625656594782e-05, "loss": 0.0494, "step": 7754, "task_loss": 0.018385985866189003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999496882604423, "compression_loss": 0.0, "distillation_loss": 0.11135755479335785, "epoch": 7.36, "learning_rate": 4.967454510310235e-05, "loss": 0.1068, "step": 7755, "task_loss": 0.06577824056148529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999499132008219, "compression_loss": 0.0, "distillation_loss": 0.03572801128029823, "epoch": 7.37, "learning_rate": 4.967282915800693e-05, "loss": 0.0332, "step": 7756, "task_loss": 0.01009390503168106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999501374697381, "compression_loss": 0.0, "distillation_loss": 0.13653673231601715, "epoch": 7.37, "learning_rate": 4.9671108730973274e-05, "loss": 0.1311, "step": 7757, "task_loss": 0.08257921040058136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999503610681947, "compression_loss": 0.0, "distillation_loss": 0.01905696839094162, "epoch": 7.37, "learning_rate": 4.9669383822313886e-05, "loss": 0.0178, "step": 7758, "task_loss": 0.006181072443723679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999505839971953, "compression_loss": 0.0, "distillation_loss": 0.051291391253471375, "epoch": 7.37, "learning_rate": 4.966765443234212e-05, "loss": 0.0508, "step": 7759, "task_loss": 0.04657955840229988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999508062577437, "compression_loss": 0.0, "distillation_loss": 0.025289995595812798, "epoch": 7.37, "learning_rate": 4.966592056137213e-05, "loss": 0.0322, "step": 7760, "task_loss": 0.09458184987306595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999510278508435, "compression_loss": 0.0, "distillation_loss": 0.022838320583105087, "epoch": 7.37, "learning_rate": 4.966418220971888e-05, "loss": 0.031, "step": 7761, "task_loss": 0.10419056564569473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999512487774983, "compression_loss": 0.0, "distillation_loss": 0.015206392854452133, "epoch": 7.37, "learning_rate": 4.9662439377698145e-05, "loss": 0.0172, "step": 7762, "task_loss": 0.03513802960515022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999514690387121, "compression_loss": 0.0, "distillation_loss": 0.07149576395750046, "epoch": 7.37, "learning_rate": 4.9660692065626515e-05, "loss": 0.0657, "step": 7763, "task_loss": 0.013111604377627373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999516886354882, "compression_loss": 0.0, "distillation_loss": 0.0190842617303133, "epoch": 7.37, "learning_rate": 4.965894027382141e-05, "loss": 0.0176, "step": 7764, "task_loss": 0.00447353720664978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999519075688304, "compression_loss": 0.0, "distillation_loss": 0.027284346520900726, "epoch": 7.37, "learning_rate": 4.965718400260105e-05, "loss": 0.0253, "step": 7765, "task_loss": 0.007609104737639427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999521258397424, "compression_loss": 0.0, "distillation_loss": 0.05418026074767113, "epoch": 7.38, "learning_rate": 4.965542325228446e-05, "loss": 0.0654, "step": 7766, "task_loss": 0.16616730391979218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999523434492279, "compression_loss": 0.0, "distillation_loss": 0.09797915816307068, "epoch": 7.38, "learning_rate": 4.96536580231915e-05, "loss": 0.106, "step": 7767, "task_loss": 0.17831069231033325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999525603982907, "compression_loss": 0.0, "distillation_loss": 0.09306447952985764, "epoch": 7.38, "learning_rate": 4.9651888315642815e-05, "loss": 0.0886, "step": 7768, "task_loss": 0.04870466887950897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999527766879343, "compression_loss": 0.0, "distillation_loss": 0.04752252995967865, "epoch": 7.38, "learning_rate": 4.96501141299599e-05, "loss": 0.0523, "step": 7769, "task_loss": 0.09561814367771149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999529923191625, "compression_loss": 0.0, "distillation_loss": 0.05140899494290352, "epoch": 7.38, "learning_rate": 4.9648335466465035e-05, "loss": 0.0529, "step": 7770, "task_loss": 0.06599454581737518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999532072929788, "compression_loss": 0.0, "distillation_loss": 0.049043748527765274, "epoch": 7.38, "learning_rate": 4.964655232548133e-05, "loss": 0.0707, "step": 7771, "task_loss": 0.2652056813240051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999534216103871, "compression_loss": 0.0, "distillation_loss": 0.08960321545600891, "epoch": 7.38, "learning_rate": 4.964476470733269e-05, "loss": 0.093, "step": 7772, "task_loss": 0.12396573275327682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799953635272391, "compression_loss": 0.0, "distillation_loss": 0.028217440471053123, "epoch": 7.38, "learning_rate": 4.964297261234385e-05, "loss": 0.0295, "step": 7773, "task_loss": 0.0411832258105278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999538482799942, "compression_loss": 0.0, "distillation_loss": 0.03329095244407654, "epoch": 7.38, "learning_rate": 4.964117604084036e-05, "loss": 0.0398, "step": 7774, "task_loss": 0.09877783805131912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999540606342003, "compression_loss": 0.0, "distillation_loss": 0.09195288270711899, "epoch": 7.38, "learning_rate": 4.963937499314857e-05, "loss": 0.1022, "step": 7775, "task_loss": 0.1942749172449112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799954272336013, "compression_loss": 0.0, "distillation_loss": 0.036124154925346375, "epoch": 7.38, "learning_rate": 4.963756946959564e-05, "loss": 0.0386, "step": 7776, "task_loss": 0.060537442564964294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799954483386436, "compression_loss": 0.0, "distillation_loss": 0.04166106879711151, "epoch": 7.39, "learning_rate": 4.9635759470509554e-05, "loss": 0.0392, "step": 7777, "task_loss": 0.017511041834950447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999546937864732, "compression_loss": 0.0, "distillation_loss": 0.03311231732368469, "epoch": 7.39, "learning_rate": 4.9633944996219125e-05, "loss": 0.0324, "step": 7778, "task_loss": 0.025931518524885178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799954903537128, "compression_loss": 0.0, "distillation_loss": 0.11695058643817902, "epoch": 7.39, "learning_rate": 4.9632126047053954e-05, "loss": 0.1216, "step": 7779, "task_loss": 0.1629902720451355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999551126394041, "compression_loss": 0.0, "distillation_loss": 0.12245595455169678, "epoch": 7.39, "learning_rate": 4.963030262334445e-05, "loss": 0.1158, "step": 7780, "task_loss": 0.05568038299679756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999553210943052, "compression_loss": 0.0, "distillation_loss": 0.027472712099552155, "epoch": 7.39, "learning_rate": 4.962847472542185e-05, "loss": 0.031, "step": 7781, "task_loss": 0.062434788793325424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999555289028352, "compression_loss": 0.0, "distillation_loss": 0.04194699972867966, "epoch": 7.39, "learning_rate": 4.96266423536182e-05, "loss": 0.0484, "step": 7782, "task_loss": 0.10626760870218277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999557360659975, "compression_loss": 0.0, "distillation_loss": 0.05600655823945999, "epoch": 7.39, "learning_rate": 4.9624805508266375e-05, "loss": 0.0603, "step": 7783, "task_loss": 0.09906087070703506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799955942584796, "compression_loss": 0.0, "distillation_loss": 0.03841715306043625, "epoch": 7.39, "learning_rate": 4.9622964189700026e-05, "loss": 0.0373, "step": 7784, "task_loss": 0.027386927977204323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999561484602341, "compression_loss": 0.0, "distillation_loss": 0.033997707068920135, "epoch": 7.39, "learning_rate": 4.962111839825365e-05, "loss": 0.0323, "step": 7785, "task_loss": 0.016779828816652298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999563536933157, "compression_loss": 0.0, "distillation_loss": 0.05404585599899292, "epoch": 7.39, "learning_rate": 4.961926813426254e-05, "loss": 0.0535, "step": 7786, "task_loss": 0.04876035451889038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999565582850446, "compression_loss": 0.0, "distillation_loss": 0.03144819289445877, "epoch": 7.4, "learning_rate": 4.9617413398062814e-05, "loss": 0.0358, "step": 7787, "task_loss": 0.07468868046998978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999567622364242, "compression_loss": 0.0, "distillation_loss": 0.09421627223491669, "epoch": 7.4, "learning_rate": 4.9615554189991374e-05, "loss": 0.0962, "step": 7788, "task_loss": 0.11395429074764252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999569655484584, "compression_loss": 0.0, "distillation_loss": 0.05254974216222763, "epoch": 7.4, "learning_rate": 4.9613690510385965e-05, "loss": 0.0524, "step": 7789, "task_loss": 0.051460277289152145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999571682221507, "compression_loss": 0.0, "distillation_loss": 0.1056189015507698, "epoch": 7.4, "learning_rate": 4.961182235958515e-05, "loss": 0.1065, "step": 7790, "task_loss": 0.11400679498910904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999573702585049, "compression_loss": 0.0, "distillation_loss": 0.04173080623149872, "epoch": 7.4, "learning_rate": 4.9609949737928254e-05, "loss": 0.0572, "step": 7791, "task_loss": 0.19677627086639404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999575716585247, "compression_loss": 0.0, "distillation_loss": 0.021544422954320908, "epoch": 7.4, "learning_rate": 4.9608072645755476e-05, "loss": 0.0198, "step": 7792, "task_loss": 0.004072500392794609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999577724232136, "compression_loss": 0.0, "distillation_loss": 0.06151081249117851, "epoch": 7.4, "learning_rate": 4.960619108340778e-05, "loss": 0.058, "step": 7793, "task_loss": 0.02649463526904583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999579725535755, "compression_loss": 0.0, "distillation_loss": 0.07832042872905731, "epoch": 7.4, "learning_rate": 4.9604305051226976e-05, "loss": 0.0742, "step": 7794, "task_loss": 0.03758959844708443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799958172050614, "compression_loss": 0.0, "distillation_loss": 0.058107852935791016, "epoch": 7.4, "learning_rate": 4.960241454955566e-05, "loss": 0.0527, "step": 7795, "task_loss": 0.004193047061562538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999583709153328, "compression_loss": 0.0, "distillation_loss": 0.025543441995978355, "epoch": 7.4, "learning_rate": 4.960051957873725e-05, "loss": 0.0301, "step": 7796, "task_loss": 0.07116261124610901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999585691487355, "compression_loss": 0.0, "distillation_loss": 0.09677424281835556, "epoch": 7.4, "learning_rate": 4.959862013911599e-05, "loss": 0.0951, "step": 7797, "task_loss": 0.07997578382492065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799958766751826, "compression_loss": 0.0, "distillation_loss": 0.03689539432525635, "epoch": 7.41, "learning_rate": 4.959671623103691e-05, "loss": 0.0423, "step": 7798, "task_loss": 0.09053385257720947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999589637256077, "compression_loss": 0.0, "distillation_loss": 0.09475536644458771, "epoch": 7.41, "learning_rate": 4.959480785484587e-05, "loss": 0.1033, "step": 7799, "task_loss": 0.17996615171432495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999591600710844, "compression_loss": 0.0, "distillation_loss": 0.10527956485748291, "epoch": 7.41, "learning_rate": 4.959289501088953e-05, "loss": 0.0973, "step": 7800, "task_loss": 0.025774430483579636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999593557892598, "compression_loss": 0.0, "distillation_loss": 0.04095172882080078, "epoch": 7.41, "learning_rate": 4.9590977699515374e-05, "loss": 0.0552, "step": 7801, "task_loss": 0.18330860137939453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999595508811376, "compression_loss": 0.0, "distillation_loss": 0.03950580954551697, "epoch": 7.41, "learning_rate": 4.958905592107168e-05, "loss": 0.0592, "step": 7802, "task_loss": 0.2363201081752777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999597453477214, "compression_loss": 0.0, "distillation_loss": 0.1426384150981903, "epoch": 7.41, "learning_rate": 4.958712967590756e-05, "loss": 0.1449, "step": 7803, "task_loss": 0.16487504541873932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999599391900151, "compression_loss": 0.0, "distillation_loss": 0.03035861626267433, "epoch": 7.41, "learning_rate": 4.9585198964372925e-05, "loss": 0.0333, "step": 7804, "task_loss": 0.060261476784944534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999601324090221, "compression_loss": 0.0, "distillation_loss": 0.0202895849943161, "epoch": 7.41, "learning_rate": 4.958326378681849e-05, "loss": 0.0284, "step": 7805, "task_loss": 0.10182420909404755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999603250057462, "compression_loss": 0.0, "distillation_loss": 0.04021252319216728, "epoch": 7.41, "learning_rate": 4.958132414359579e-05, "loss": 0.0507, "step": 7806, "task_loss": 0.14528432488441467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999605169811911, "compression_loss": 0.0, "distillation_loss": 0.14047583937644958, "epoch": 7.41, "learning_rate": 4.957938003505718e-05, "loss": 0.1381, "step": 7807, "task_loss": 0.1167716383934021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999607083363605, "compression_loss": 0.0, "distillation_loss": 0.06726957857608795, "epoch": 7.42, "learning_rate": 4.957743146155581e-05, "loss": 0.0725, "step": 7808, "task_loss": 0.11913690716028214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799960899072258, "compression_loss": 0.0, "distillation_loss": 0.02605987712740898, "epoch": 7.42, "learning_rate": 4.9575478423445655e-05, "loss": 0.024, "step": 7809, "task_loss": 0.005071789026260376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999610891898873, "compression_loss": 0.0, "distillation_loss": 0.04089764878153801, "epoch": 7.42, "learning_rate": 4.957352092108148e-05, "loss": 0.0429, "step": 7810, "task_loss": 0.06113029643893242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999612786902521, "compression_loss": 0.0, "distillation_loss": 0.014122812077403069, "epoch": 7.42, "learning_rate": 4.957155895481889e-05, "loss": 0.0132, "step": 7811, "task_loss": 0.005167461931705475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999614675743562, "compression_loss": 0.0, "distillation_loss": 0.013877642340958118, "epoch": 7.42, "learning_rate": 4.956959252501426e-05, "loss": 0.0172, "step": 7812, "task_loss": 0.047237932682037354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999616558432031, "compression_loss": 0.0, "distillation_loss": 0.04274125397205353, "epoch": 7.42, "learning_rate": 4.956762163202484e-05, "loss": 0.0458, "step": 7813, "task_loss": 0.07380035519599915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999618434977966, "compression_loss": 0.0, "distillation_loss": 0.18687260150909424, "epoch": 7.42, "learning_rate": 4.956564627620862e-05, "loss": 0.1937, "step": 7814, "task_loss": 0.25557661056518555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999620305391403, "compression_loss": 0.0, "distillation_loss": 0.136896014213562, "epoch": 7.42, "learning_rate": 4.956366645792445e-05, "loss": 0.1269, "step": 7815, "task_loss": 0.03715690225362778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799962216968238, "compression_loss": 0.0, "distillation_loss": 0.011462707072496414, "epoch": 7.42, "learning_rate": 4.956168217753197e-05, "loss": 0.0107, "step": 7816, "task_loss": 0.004216820001602173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999624027860932, "compression_loss": 0.0, "distillation_loss": 0.030813174322247505, "epoch": 7.42, "learning_rate": 4.955969343539162e-05, "loss": 0.0384, "step": 7817, "task_loss": 0.10682034492492676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999625879937098, "compression_loss": 0.0, "distillation_loss": 0.0257425494492054, "epoch": 7.42, "learning_rate": 4.955770023186469e-05, "loss": 0.024, "step": 7818, "task_loss": 0.007959723472595215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999627725920913, "compression_loss": 0.0, "distillation_loss": 0.10705772042274475, "epoch": 7.43, "learning_rate": 4.9555702567313235e-05, "loss": 0.1132, "step": 7819, "task_loss": 0.1683865487575531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999629565822415, "compression_loss": 0.0, "distillation_loss": 0.07967795431613922, "epoch": 7.43, "learning_rate": 4.9553700442100146e-05, "loss": 0.0802, "step": 7820, "task_loss": 0.08508510887622833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799963139965164, "compression_loss": 0.0, "distillation_loss": 0.14191925525665283, "epoch": 7.43, "learning_rate": 4.955169385658912e-05, "loss": 0.1483, "step": 7821, "task_loss": 0.20544429123401642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999633227418624, "compression_loss": 0.0, "distillation_loss": 0.025436338037252426, "epoch": 7.43, "learning_rate": 4.954968281114467e-05, "loss": 0.0262, "step": 7822, "task_loss": 0.03303007036447525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999635049133407, "compression_loss": 0.0, "distillation_loss": 0.025042004883289337, "epoch": 7.43, "learning_rate": 4.9547667306132096e-05, "loss": 0.0274, "step": 7823, "task_loss": 0.048622217029333115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999636864806022, "compression_loss": 0.0, "distillation_loss": 0.04454713687300682, "epoch": 7.43, "learning_rate": 4.954564734191753e-05, "loss": 0.0541, "step": 7824, "task_loss": 0.13989417254924774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799963867444651, "compression_loss": 0.0, "distillation_loss": 0.037809595465660095, "epoch": 7.43, "learning_rate": 4.9543622918867926e-05, "loss": 0.0451, "step": 7825, "task_loss": 0.11072047799825668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999640478064903, "compression_loss": 0.0, "distillation_loss": 0.10665163397789001, "epoch": 7.43, "learning_rate": 4.9541594037351e-05, "loss": 0.1153, "step": 7826, "task_loss": 0.19337314367294312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999642275671242, "compression_loss": 0.0, "distillation_loss": 0.07981669902801514, "epoch": 7.43, "learning_rate": 4.953956069773534e-05, "loss": 0.0826, "step": 7827, "task_loss": 0.10715599358081818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999644067275561, "compression_loss": 0.0, "distillation_loss": 0.026072172448039055, "epoch": 7.43, "learning_rate": 4.953752290039028e-05, "loss": 0.0309, "step": 7828, "task_loss": 0.07401913404464722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999645852887898, "compression_loss": 0.0, "distillation_loss": 0.05162549391388893, "epoch": 7.43, "learning_rate": 4.953548064568602e-05, "loss": 0.0483, "step": 7829, "task_loss": 0.018601059913635254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799964763251829, "compression_loss": 0.0, "distillation_loss": 0.03277206048369408, "epoch": 7.44, "learning_rate": 4.953343393399354e-05, "loss": 0.0379, "step": 7830, "task_loss": 0.08411306887865067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999649406176773, "compression_loss": 0.0, "distillation_loss": 0.023461688309907913, "epoch": 7.44, "learning_rate": 4.953138276568462e-05, "loss": 0.0346, "step": 7831, "task_loss": 0.13488860428333282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999651173873386, "compression_loss": 0.0, "distillation_loss": 0.20481958985328674, "epoch": 7.44, "learning_rate": 4.952932714113188e-05, "loss": 0.2078, "step": 7832, "task_loss": 0.2342824935913086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999652935618162, "compression_loss": 0.0, "distillation_loss": 0.05175776779651642, "epoch": 7.44, "learning_rate": 4.9527267060708734e-05, "loss": 0.0608, "step": 7833, "task_loss": 0.14181137084960938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999654691421142, "compression_loss": 0.0, "distillation_loss": 0.1361636519432068, "epoch": 7.44, "learning_rate": 4.9525202524789397e-05, "loss": 0.1285, "step": 7834, "task_loss": 0.059967104345560074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799965644129236, "compression_loss": 0.0, "distillation_loss": 0.03751971200108528, "epoch": 7.44, "learning_rate": 4.952313353374891e-05, "loss": 0.0376, "step": 7835, "task_loss": 0.03848748654127121 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999658185241852, "compression_loss": 0.0, "distillation_loss": 0.03644530102610588, "epoch": 7.44, "learning_rate": 4.952106008796311e-05, "loss": 0.0375, "step": 7836, "task_loss": 0.04672703891992569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799965992327966, "compression_loss": 0.0, "distillation_loss": 0.03545859456062317, "epoch": 7.44, "learning_rate": 4.9518982187808653e-05, "loss": 0.036, "step": 7837, "task_loss": 0.040626343339681625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999661655415814, "compression_loss": 0.0, "distillation_loss": 0.04039856046438217, "epoch": 7.44, "learning_rate": 4.9516899833663e-05, "loss": 0.0369, "step": 7838, "task_loss": 0.005691641941666603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999663381660356, "compression_loss": 0.0, "distillation_loss": 0.019872594624757767, "epoch": 7.44, "learning_rate": 4.9514813025904413e-05, "loss": 0.0265, "step": 7839, "task_loss": 0.0859319418668747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799966510202332, "compression_loss": 0.0, "distillation_loss": 0.024417951703071594, "epoch": 7.45, "learning_rate": 4.951272176491197e-05, "loss": 0.0301, "step": 7840, "task_loss": 0.08107300847768784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999666816514744, "compression_loss": 0.0, "distillation_loss": 0.03704302757978439, "epoch": 7.45, "learning_rate": 4.951062605106557e-05, "loss": 0.0485, "step": 7841, "task_loss": 0.15177100896835327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999668525144665, "compression_loss": 0.0, "distillation_loss": 0.04666809365153313, "epoch": 7.45, "learning_rate": 4.950852588474591e-05, "loss": 0.0444, "step": 7842, "task_loss": 0.024120669811964035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999670227923119, "compression_loss": 0.0, "distillation_loss": 0.02970319800078869, "epoch": 7.45, "learning_rate": 4.9506421266334475e-05, "loss": 0.0321, "step": 7843, "task_loss": 0.053807858377695084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999671924860143, "compression_loss": 0.0, "distillation_loss": 0.027329690754413605, "epoch": 7.45, "learning_rate": 4.9504312196213596e-05, "loss": 0.0253, "step": 7844, "task_loss": 0.007107492536306381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999673615965774, "compression_loss": 0.0, "distillation_loss": 0.04351479932665825, "epoch": 7.45, "learning_rate": 4.95021986747664e-05, "loss": 0.0472, "step": 7845, "task_loss": 0.08041299134492874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799967530125005, "compression_loss": 0.0, "distillation_loss": 0.07271379977464676, "epoch": 7.45, "learning_rate": 4.9500080702376805e-05, "loss": 0.0735, "step": 7846, "task_loss": 0.08034893125295639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999676980723006, "compression_loss": 0.0, "distillation_loss": 0.07482333481311798, "epoch": 7.45, "learning_rate": 4.949795827942956e-05, "loss": 0.087, "step": 7847, "task_loss": 0.19708333909511566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799967865439468, "compression_loss": 0.0, "distillation_loss": 0.1008092612028122, "epoch": 7.45, "learning_rate": 4.9495831406310205e-05, "loss": 0.1136, "step": 7848, "task_loss": 0.22852961719036102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999680322275107, "compression_loss": 0.0, "distillation_loss": 0.05679786577820778, "epoch": 7.45, "learning_rate": 4.94937000834051e-05, "loss": 0.0553, "step": 7849, "task_loss": 0.042113713920116425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999681984374326, "compression_loss": 0.0, "distillation_loss": 0.15729619562625885, "epoch": 7.45, "learning_rate": 4.9491564311101426e-05, "loss": 0.1526, "step": 7850, "task_loss": 0.1107601523399353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999683640702373, "compression_loss": 0.0, "distillation_loss": 0.0956711545586586, "epoch": 7.46, "learning_rate": 4.9489424089787125e-05, "loss": 0.0906, "step": 7851, "task_loss": 0.04521763324737549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999685291269284, "compression_loss": 0.0, "distillation_loss": 0.1287081092596054, "epoch": 7.46, "learning_rate": 4.948727941985101e-05, "loss": 0.1281, "step": 7852, "task_loss": 0.1223960816860199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999686936085098, "compression_loss": 0.0, "distillation_loss": 0.02583775669336319, "epoch": 7.46, "learning_rate": 4.948513030168265e-05, "loss": 0.0244, "step": 7853, "task_loss": 0.011274663731455803 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999688575159849, "compression_loss": 0.0, "distillation_loss": 0.016532419249415398, "epoch": 7.46, "learning_rate": 4.948297673567245e-05, "loss": 0.0152, "step": 7854, "task_loss": 0.0036627184599637985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999690208503576, "compression_loss": 0.0, "distillation_loss": 0.11579355597496033, "epoch": 7.46, "learning_rate": 4.948081872221161e-05, "loss": 0.1192, "step": 7855, "task_loss": 0.1501166969537735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999691836126315, "compression_loss": 0.0, "distillation_loss": 0.038752175867557526, "epoch": 7.46, "learning_rate": 4.9478656261692155e-05, "loss": 0.0355, "step": 7856, "task_loss": 0.0065444111824035645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999693458038103, "compression_loss": 0.0, "distillation_loss": 0.027601994574069977, "epoch": 7.46, "learning_rate": 4.947648935450689e-05, "loss": 0.0254, "step": 7857, "task_loss": 0.00563957542181015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999695074248976, "compression_loss": 0.0, "distillation_loss": 0.10853047668933868, "epoch": 7.46, "learning_rate": 4.947431800104947e-05, "loss": 0.1053, "step": 7858, "task_loss": 0.07578997313976288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999696684768972, "compression_loss": 0.0, "distillation_loss": 0.05468723922967911, "epoch": 7.46, "learning_rate": 4.94721422017143e-05, "loss": 0.0603, "step": 7859, "task_loss": 0.11080426722764969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999698289608128, "compression_loss": 0.0, "distillation_loss": 0.05597386136651039, "epoch": 7.46, "learning_rate": 4.946996195689665e-05, "loss": 0.0563, "step": 7860, "task_loss": 0.0596843883395195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999699888776479, "compression_loss": 0.0, "distillation_loss": 0.0576602928340435, "epoch": 7.47, "learning_rate": 4.9467777266992555e-05, "loss": 0.0558, "step": 7861, "task_loss": 0.038872990757226944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999701482284064, "compression_loss": 0.0, "distillation_loss": 0.032028764486312866, "epoch": 7.47, "learning_rate": 4.946558813239888e-05, "loss": 0.0351, "step": 7862, "task_loss": 0.06291679292917252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999703070140919, "compression_loss": 0.0, "distillation_loss": 0.07386447489261627, "epoch": 7.47, "learning_rate": 4.94633945535133e-05, "loss": 0.071, "step": 7863, "task_loss": 0.04504578933119774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999704652357079, "compression_loss": 0.0, "distillation_loss": 0.026677310466766357, "epoch": 7.47, "learning_rate": 4.946119653073428e-05, "loss": 0.0249, "step": 7864, "task_loss": 0.009069759398698807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999706228942585, "compression_loss": 0.0, "distillation_loss": 0.025820819661021233, "epoch": 7.47, "learning_rate": 4.9458994064461103e-05, "loss": 0.0246, "step": 7865, "task_loss": 0.013258153572678566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799970779990747, "compression_loss": 0.0, "distillation_loss": 0.15189041197299957, "epoch": 7.47, "learning_rate": 4.945678715509386e-05, "loss": 0.1558, "step": 7866, "task_loss": 0.1908566802740097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999709365261771, "compression_loss": 0.0, "distillation_loss": 0.08925406634807587, "epoch": 7.47, "learning_rate": 4.9454575803033445e-05, "loss": 0.0941, "step": 7867, "task_loss": 0.13728086650371552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999710925015526, "compression_loss": 0.0, "distillation_loss": 0.12610575556755066, "epoch": 7.47, "learning_rate": 4.945236000868156e-05, "loss": 0.12, "step": 7868, "task_loss": 0.06535394489765167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999712479178773, "compression_loss": 0.0, "distillation_loss": 0.02531656250357628, "epoch": 7.47, "learning_rate": 4.9450139772440715e-05, "loss": 0.0236, "step": 7869, "task_loss": 0.007954435423016548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999714027761547, "compression_loss": 0.0, "distillation_loss": 0.02278076484799385, "epoch": 7.47, "learning_rate": 4.944791509471423e-05, "loss": 0.0209, "step": 7870, "task_loss": 0.0036298464983701706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999715570773887, "compression_loss": 0.0, "distillation_loss": 0.010984521359205246, "epoch": 7.47, "learning_rate": 4.944568597590622e-05, "loss": 0.0103, "step": 7871, "task_loss": 0.003945378586649895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999717108225826, "compression_loss": 0.0, "distillation_loss": 0.011236527003347874, "epoch": 7.48, "learning_rate": 4.944345241642162e-05, "loss": 0.0105, "step": 7872, "task_loss": 0.0039033014327287674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999718640127403, "compression_loss": 0.0, "distillation_loss": 0.11728902161121368, "epoch": 7.48, "learning_rate": 4.944121441666617e-05, "loss": 0.1264, "step": 7873, "task_loss": 0.2082228660583496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999720166488656, "compression_loss": 0.0, "distillation_loss": 0.05443963408470154, "epoch": 7.48, "learning_rate": 4.943897197704642e-05, "loss": 0.0619, "step": 7874, "task_loss": 0.12860585749149323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999721687319621, "compression_loss": 0.0, "distillation_loss": 0.10132479667663574, "epoch": 7.48, "learning_rate": 4.9436725097969696e-05, "loss": 0.1139, "step": 7875, "task_loss": 0.22712013125419617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999723202630333, "compression_loss": 0.0, "distillation_loss": 0.020510438829660416, "epoch": 7.48, "learning_rate": 4.943447377984418e-05, "loss": 0.0188, "step": 7876, "task_loss": 0.0038789715617895126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999724712430831, "compression_loss": 0.0, "distillation_loss": 0.17946818470954895, "epoch": 7.48, "learning_rate": 4.943221802307882e-05, "loss": 0.1833, "step": 7877, "task_loss": 0.21781717240810394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999726216731151, "compression_loss": 0.0, "distillation_loss": 0.050077952444553375, "epoch": 7.48, "learning_rate": 4.942995782808339e-05, "loss": 0.0547, "step": 7878, "task_loss": 0.09608137607574463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799972771554133, "compression_loss": 0.0, "distillation_loss": 0.03088299185037613, "epoch": 7.48, "learning_rate": 4.9427693195268466e-05, "loss": 0.0283, "step": 7879, "task_loss": 0.005498785525560379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999729208871406, "compression_loss": 0.0, "distillation_loss": 0.0309526976197958, "epoch": 7.48, "learning_rate": 4.942542412504543e-05, "loss": 0.0317, "step": 7880, "task_loss": 0.038051947951316833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999730696731413, "compression_loss": 0.0, "distillation_loss": 0.028046930208802223, "epoch": 7.48, "learning_rate": 4.942315061782646e-05, "loss": 0.0358, "step": 7881, "task_loss": 0.10542275756597519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799973217913139, "compression_loss": 0.0, "distillation_loss": 0.026341721415519714, "epoch": 7.49, "learning_rate": 4.942087267402457e-05, "loss": 0.0245, "step": 7882, "task_loss": 0.007860302925109863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999733656081373, "compression_loss": 0.0, "distillation_loss": 0.014681628905236721, "epoch": 7.49, "learning_rate": 4.941859029405353e-05, "loss": 0.0138, "step": 7883, "task_loss": 0.0060966480523347855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79997351275914, "compression_loss": 0.0, "distillation_loss": 0.17591474950313568, "epoch": 7.49, "learning_rate": 4.9416303478327974e-05, "loss": 0.1843, "step": 7884, "task_loss": 0.25975045561790466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999736593671505, "compression_loss": 0.0, "distillation_loss": 0.04054497182369232, "epoch": 7.49, "learning_rate": 4.9414012227263295e-05, "loss": 0.0438, "step": 7885, "task_loss": 0.07342273741960526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999738054331729, "compression_loss": 0.0, "distillation_loss": 0.2347552627325058, "epoch": 7.49, "learning_rate": 4.941171654127572e-05, "loss": 0.2415, "step": 7886, "task_loss": 0.301849901676178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999739509582104, "compression_loss": 0.0, "distillation_loss": 0.04243939742445946, "epoch": 7.49, "learning_rate": 4.9409416420782264e-05, "loss": 0.0463, "step": 7887, "task_loss": 0.0810224711894989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999740959432671, "compression_loss": 0.0, "distillation_loss": 0.021390702575445175, "epoch": 7.49, "learning_rate": 4.940711186620076e-05, "loss": 0.0198, "step": 7888, "task_loss": 0.0057023558765649796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999742403893465, "compression_loss": 0.0, "distillation_loss": 0.08921066671609879, "epoch": 7.49, "learning_rate": 4.9404802877949843e-05, "loss": 0.1026, "step": 7889, "task_loss": 0.22274163365364075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999743842974523, "compression_loss": 0.0, "distillation_loss": 0.03275410085916519, "epoch": 7.49, "learning_rate": 4.940248945644894e-05, "loss": 0.0315, "step": 7890, "task_loss": 0.02053932100534439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999745276685881, "compression_loss": 0.0, "distillation_loss": 0.11449338495731354, "epoch": 7.49, "learning_rate": 4.9400171602118306e-05, "loss": 0.1164, "step": 7891, "task_loss": 0.13319021463394165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999746705037577, "compression_loss": 0.0, "distillation_loss": 0.03880294784903526, "epoch": 7.49, "learning_rate": 4.939784931537899e-05, "loss": 0.0356, "step": 7892, "task_loss": 0.00670219212770462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999748128039648, "compression_loss": 0.0, "distillation_loss": 0.09690918028354645, "epoch": 7.5, "learning_rate": 4.9395522596652846e-05, "loss": 0.0992, "step": 7893, "task_loss": 0.12014244496822357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999749545702131, "compression_loss": 0.0, "distillation_loss": 0.025341130793094635, "epoch": 7.5, "learning_rate": 4.939319144636253e-05, "loss": 0.0233, "step": 7894, "task_loss": 0.005261138081550598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799975095803506, "compression_loss": 0.0, "distillation_loss": 0.0120600126683712, "epoch": 7.5, "learning_rate": 4.9390855864931504e-05, "loss": 0.0113, "step": 7895, "task_loss": 0.004162052646279335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999752365048475, "compression_loss": 0.0, "distillation_loss": 0.1584700495004654, "epoch": 7.5, "learning_rate": 4.938851585278405e-05, "loss": 0.1588, "step": 7896, "task_loss": 0.16162604093551636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999753766752412, "compression_loss": 0.0, "distillation_loss": 0.0646199956536293, "epoch": 7.5, "learning_rate": 4.938617141034523e-05, "loss": 0.0662, "step": 7897, "task_loss": 0.08005733042955399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999755163156906, "compression_loss": 0.0, "distillation_loss": 0.08716416358947754, "epoch": 7.5, "learning_rate": 4.938382253804094e-05, "loss": 0.0858, "step": 7898, "task_loss": 0.07349246740341187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999756554271997, "compression_loss": 0.0, "distillation_loss": 0.022657334804534912, "epoch": 7.5, "learning_rate": 4.938146923629784e-05, "loss": 0.0267, "step": 7899, "task_loss": 0.06275828927755356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799975794010772, "compression_loss": 0.0, "distillation_loss": 0.01820671185851097, "epoch": 7.5, "learning_rate": 4.937911150554343e-05, "loss": 0.0228, "step": 7900, "task_loss": 0.06378181278705597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999759320674112, "compression_loss": 0.0, "distillation_loss": 0.11242584884166718, "epoch": 7.5, "learning_rate": 4.9376749346206006e-05, "loss": 0.1087, "step": 7901, "task_loss": 0.07475525140762329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799976069598121, "compression_loss": 0.0, "distillation_loss": 0.048631347715854645, "epoch": 7.5, "learning_rate": 4.937438275871467e-05, "loss": 0.058, "step": 7902, "task_loss": 0.1423451006412506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799976206603905, "compression_loss": 0.0, "distillation_loss": 0.035419534891843796, "epoch": 7.51, "learning_rate": 4.9372011743499315e-05, "loss": 0.0361, "step": 7903, "task_loss": 0.042088061571121216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999763430857669, "compression_loss": 0.0, "distillation_loss": 0.05880480259656906, "epoch": 7.51, "learning_rate": 4.9369636300990645e-05, "loss": 0.0785, "step": 7904, "task_loss": 0.2559299170970917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999764790447106, "compression_loss": 0.0, "distillation_loss": 0.12047819793224335, "epoch": 7.51, "learning_rate": 4.936725643162018e-05, "loss": 0.1142, "step": 7905, "task_loss": 0.05795508623123169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999766144817395, "compression_loss": 0.0, "distillation_loss": 0.11895313113927841, "epoch": 7.51, "learning_rate": 4.936487213582023e-05, "loss": 0.1289, "step": 7906, "task_loss": 0.2186514288187027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999767493978573, "compression_loss": 0.0, "distillation_loss": 0.08085181564092636, "epoch": 7.51, "learning_rate": 4.9362483414023905e-05, "loss": 0.0756, "step": 7907, "task_loss": 0.028206422924995422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799976883794068, "compression_loss": 0.0, "distillation_loss": 0.03390750288963318, "epoch": 7.51, "learning_rate": 4.936009026666515e-05, "loss": 0.0341, "step": 7908, "task_loss": 0.035543277859687805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999770176713749, "compression_loss": 0.0, "distillation_loss": 0.09985256940126419, "epoch": 7.51, "learning_rate": 4.935769269417867e-05, "loss": 0.1132, "step": 7909, "task_loss": 0.23357577621936798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999771510307819, "compression_loss": 0.0, "distillation_loss": 0.14149194955825806, "epoch": 7.51, "learning_rate": 4.935529069700001e-05, "loss": 0.1658, "step": 7910, "task_loss": 0.3845874071121216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999772838732926, "compression_loss": 0.0, "distillation_loss": 0.12010196596384048, "epoch": 7.51, "learning_rate": 4.935288427556549e-05, "loss": 0.1175, "step": 7911, "task_loss": 0.0936579704284668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999774161999107, "compression_loss": 0.0, "distillation_loss": 0.15679499506950378, "epoch": 7.51, "learning_rate": 4.935047343031227e-05, "loss": 0.1524, "step": 7912, "task_loss": 0.1123620867729187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999775480116399, "compression_loss": 0.0, "distillation_loss": 0.04103892296552658, "epoch": 7.51, "learning_rate": 4.934805816167827e-05, "loss": 0.0391, "step": 7913, "task_loss": 0.021613851189613342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999776793094838, "compression_loss": 0.0, "distillation_loss": 0.0210304856300354, "epoch": 7.52, "learning_rate": 4.934563847010224e-05, "loss": 0.0286, "step": 7914, "task_loss": 0.09712889790534973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999778100944462, "compression_loss": 0.0, "distillation_loss": 0.049790140241384506, "epoch": 7.52, "learning_rate": 4.934321435602374e-05, "loss": 0.0514, "step": 7915, "task_loss": 0.06559078395366669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999779403675308, "compression_loss": 0.0, "distillation_loss": 0.025263594463467598, "epoch": 7.52, "learning_rate": 4.934078581988311e-05, "loss": 0.0281, "step": 7916, "task_loss": 0.0537312813103199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999780701297412, "compression_loss": 0.0, "distillation_loss": 0.03194242715835571, "epoch": 7.52, "learning_rate": 4.933835286212151e-05, "loss": 0.0295, "step": 7917, "task_loss": 0.007093427702784538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799978199382081, "compression_loss": 0.0, "distillation_loss": 0.056847527623176575, "epoch": 7.52, "learning_rate": 4.9335915483180896e-05, "loss": 0.0558, "step": 7918, "task_loss": 0.046506118029356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799978328125554, "compression_loss": 0.0, "distillation_loss": 0.03372214734554291, "epoch": 7.52, "learning_rate": 4.9333473683504025e-05, "loss": 0.0443, "step": 7919, "task_loss": 0.1395803838968277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999784563611639, "compression_loss": 0.0, "distillation_loss": 0.08080513775348663, "epoch": 7.52, "learning_rate": 4.9331027463534484e-05, "loss": 0.0788, "step": 7920, "task_loss": 0.060925908386707306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999785840899144, "compression_loss": 0.0, "distillation_loss": 0.018568016588687897, "epoch": 7.52, "learning_rate": 4.932857682371661e-05, "loss": 0.0259, "step": 7921, "task_loss": 0.09170715510845184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999787113128091, "compression_loss": 0.0, "distillation_loss": 0.03294629603624344, "epoch": 7.52, "learning_rate": 4.9326121764495596e-05, "loss": 0.0304, "step": 7922, "task_loss": 0.007955122739076614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999788380308517, "compression_loss": 0.0, "distillation_loss": 0.07836222648620605, "epoch": 7.52, "learning_rate": 4.932366228631741e-05, "loss": 0.0852, "step": 7923, "task_loss": 0.14636337757110596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999789642450458, "compression_loss": 0.0, "distillation_loss": 0.08306904882192612, "epoch": 7.53, "learning_rate": 4.932119838962882e-05, "loss": 0.0848, "step": 7924, "task_loss": 0.10019460320472717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999790899563952, "compression_loss": 0.0, "distillation_loss": 0.02642223611474037, "epoch": 7.53, "learning_rate": 4.931873007487741e-05, "loss": 0.0414, "step": 7925, "task_loss": 0.17620962858200073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999792151659036, "compression_loss": 0.0, "distillation_loss": 0.030220147222280502, "epoch": 7.53, "learning_rate": 4.9316257342511565e-05, "loss": 0.0286, "step": 7926, "task_loss": 0.01419852301478386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999793398745746, "compression_loss": 0.0, "distillation_loss": 0.018758175894618034, "epoch": 7.53, "learning_rate": 4.9313780192980466e-05, "loss": 0.0236, "step": 7927, "task_loss": 0.06724384427070618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799979464083412, "compression_loss": 0.0, "distillation_loss": 0.14385569095611572, "epoch": 7.53, "learning_rate": 4.9311298626734095e-05, "loss": 0.1551, "step": 7928, "task_loss": 0.25620976090431213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999795877934193, "compression_loss": 0.0, "distillation_loss": 0.13976441323757172, "epoch": 7.53, "learning_rate": 4.9308812644223245e-05, "loss": 0.1381, "step": 7929, "task_loss": 0.12334275990724564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999797110056004, "compression_loss": 0.0, "distillation_loss": 0.04338621348142624, "epoch": 7.53, "learning_rate": 4.9306322245899505e-05, "loss": 0.0628, "step": 7930, "task_loss": 0.23742617666721344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999798337209587, "compression_loss": 0.0, "distillation_loss": 0.010634875856339931, "epoch": 7.53, "learning_rate": 4.930382743221528e-05, "loss": 0.0198, "step": 7931, "task_loss": 0.1026746854186058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999799559404982, "compression_loss": 0.0, "distillation_loss": 0.028556659817695618, "epoch": 7.53, "learning_rate": 4.930132820362374e-05, "loss": 0.0416, "step": 7932, "task_loss": 0.15901359915733337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999800776652223, "compression_loss": 0.0, "distillation_loss": 0.06336972862482071, "epoch": 7.53, "learning_rate": 4.9298824560578895e-05, "loss": 0.0625, "step": 7933, "task_loss": 0.054341040551662445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999801988961349, "compression_loss": 0.0, "distillation_loss": 0.09229055047035217, "epoch": 7.53, "learning_rate": 4.929631650353555e-05, "loss": 0.0899, "step": 7934, "task_loss": 0.06858476251363754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999803196342395, "compression_loss": 0.0, "distillation_loss": 0.10068748891353607, "epoch": 7.54, "learning_rate": 4.92938040329493e-05, "loss": 0.0991, "step": 7935, "task_loss": 0.0846622884273529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79998043988054, "compression_loss": 0.0, "distillation_loss": 0.033520594239234924, "epoch": 7.54, "learning_rate": 4.9291287149276544e-05, "loss": 0.0323, "step": 7936, "task_loss": 0.02134551852941513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999805596360399, "compression_loss": 0.0, "distillation_loss": 0.04189582169055939, "epoch": 7.54, "learning_rate": 4.928876585297448e-05, "loss": 0.0512, "step": 7937, "task_loss": 0.13514596223831177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999806789017428, "compression_loss": 0.0, "distillation_loss": 0.04260486364364624, "epoch": 7.54, "learning_rate": 4.9286240144501136e-05, "loss": 0.0483, "step": 7938, "task_loss": 0.09960085153579712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999807976786527, "compression_loss": 0.0, "distillation_loss": 0.08986024558544159, "epoch": 7.54, "learning_rate": 4.928371002431531e-05, "loss": 0.0864, "step": 7939, "task_loss": 0.05546959117054939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999809159677731, "compression_loss": 0.0, "distillation_loss": 0.05223695933818817, "epoch": 7.54, "learning_rate": 4.92811754928766e-05, "loss": 0.0485, "step": 7940, "task_loss": 0.014858538284897804 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999810337701077, "compression_loss": 0.0, "distillation_loss": 0.022646090015769005, "epoch": 7.54, "learning_rate": 4.927863655064542e-05, "loss": 0.0294, "step": 7941, "task_loss": 0.0898606926202774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79998115108666, "compression_loss": 0.0, "distillation_loss": 0.12153077125549316, "epoch": 7.54, "learning_rate": 4.9276093198082986e-05, "loss": 0.1213, "step": 7942, "task_loss": 0.11907624453306198 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799981267918434, "compression_loss": 0.0, "distillation_loss": 0.10180039703845978, "epoch": 7.54, "learning_rate": 4.92735454356513e-05, "loss": 0.1072, "step": 7943, "task_loss": 0.15552875399589539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999813842664332, "compression_loss": 0.0, "distillation_loss": 0.04333251342177391, "epoch": 7.54, "learning_rate": 4.927099326381319e-05, "loss": 0.0482, "step": 7944, "task_loss": 0.09174293279647827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999815001316612, "compression_loss": 0.0, "distillation_loss": 0.06807390600442886, "epoch": 7.55, "learning_rate": 4.926843668303227e-05, "loss": 0.0658, "step": 7945, "task_loss": 0.045711807906627655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799981615515122, "compression_loss": 0.0, "distillation_loss": 0.0737372487783432, "epoch": 7.55, "learning_rate": 4.926587569377293e-05, "loss": 0.0728, "step": 7946, "task_loss": 0.06481769680976868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799981730417819, "compression_loss": 0.0, "distillation_loss": 0.028609251603484154, "epoch": 7.55, "learning_rate": 4.926331029650042e-05, "loss": 0.0288, "step": 7947, "task_loss": 0.03084813803434372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999818448407559, "compression_loss": 0.0, "distillation_loss": 0.10599102079868317, "epoch": 7.55, "learning_rate": 4.926074049168074e-05, "loss": 0.1118, "step": 7948, "task_loss": 0.16414104402065277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999819587849365, "compression_loss": 0.0, "distillation_loss": 0.04272402077913284, "epoch": 7.55, "learning_rate": 4.9258166279780704e-05, "loss": 0.0482, "step": 7949, "task_loss": 0.0972161665558815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999820722513644, "compression_loss": 0.0, "distillation_loss": 0.09052185714244843, "epoch": 7.55, "learning_rate": 4.925558766126794e-05, "loss": 0.0844, "step": 7950, "task_loss": 0.029212266206741333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999821852410433, "compression_loss": 0.0, "distillation_loss": 0.031549785286188126, "epoch": 7.55, "learning_rate": 4.9253004636610856e-05, "loss": 0.0294, "step": 7951, "task_loss": 0.010255459696054459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999822977549769, "compression_loss": 0.0, "distillation_loss": 0.02592216432094574, "epoch": 7.55, "learning_rate": 4.925041720627868e-05, "loss": 0.0242, "step": 7952, "task_loss": 0.008722037076950073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999824097941688, "compression_loss": 0.0, "distillation_loss": 0.051010869443416595, "epoch": 7.55, "learning_rate": 4.9247825370741416e-05, "loss": 0.0672, "step": 7953, "task_loss": 0.2124231904745102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999825213596229, "compression_loss": 0.0, "distillation_loss": 0.04993243142962456, "epoch": 7.55, "learning_rate": 4.924522913046991e-05, "loss": 0.0522, "step": 7954, "task_loss": 0.072673000395298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999826324523426, "compression_loss": 0.0, "distillation_loss": 0.024953220039606094, "epoch": 7.55, "learning_rate": 4.924262848593576e-05, "loss": 0.0243, "step": 7955, "task_loss": 0.018649937584996223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999827430733317, "compression_loss": 0.0, "distillation_loss": 0.07212147861719131, "epoch": 7.56, "learning_rate": 4.924002343761139e-05, "loss": 0.0838, "step": 7956, "task_loss": 0.18851712346076965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799982853223594, "compression_loss": 0.0, "distillation_loss": 0.012818554416298866, "epoch": 7.56, "learning_rate": 4.923741398597002e-05, "loss": 0.0167, "step": 7957, "task_loss": 0.051808781921863556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999829629041331, "compression_loss": 0.0, "distillation_loss": 0.12450156360864639, "epoch": 7.56, "learning_rate": 4.9234800131485675e-05, "loss": 0.1184, "step": 7958, "task_loss": 0.06334085762500763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999830721159525, "compression_loss": 0.0, "distillation_loss": 0.0640573650598526, "epoch": 7.56, "learning_rate": 4.9232181874633164e-05, "loss": 0.0677, "step": 7959, "task_loss": 0.10021346807479858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999831808600562, "compression_loss": 0.0, "distillation_loss": 0.038610100746154785, "epoch": 7.56, "learning_rate": 4.922955921588812e-05, "loss": 0.0405, "step": 7960, "task_loss": 0.05722128972411156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999832891374477, "compression_loss": 0.0, "distillation_loss": 0.07282688468694687, "epoch": 7.56, "learning_rate": 4.922693215572695e-05, "loss": 0.0788, "step": 7961, "task_loss": 0.13281500339508057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999833969491307, "compression_loss": 0.0, "distillation_loss": 0.04970156401395798, "epoch": 7.56, "learning_rate": 4.922430069462688e-05, "loss": 0.0521, "step": 7962, "task_loss": 0.07329516112804413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999835042961089, "compression_loss": 0.0, "distillation_loss": 0.02577708661556244, "epoch": 7.56, "learning_rate": 4.9221664833065914e-05, "loss": 0.025, "step": 7963, "task_loss": 0.017895691096782684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999836111793859, "compression_loss": 0.0, "distillation_loss": 0.027007538825273514, "epoch": 7.56, "learning_rate": 4.921902457152289e-05, "loss": 0.0247, "step": 7964, "task_loss": 0.003965174779295921 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999837175999654, "compression_loss": 0.0, "distillation_loss": 0.019705362617969513, "epoch": 7.56, "learning_rate": 4.9216379910477403e-05, "loss": 0.0182, "step": 7965, "task_loss": 0.004658494144678116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999838235588513, "compression_loss": 0.0, "distillation_loss": 0.033040136098861694, "epoch": 7.57, "learning_rate": 4.921373085040988e-05, "loss": 0.0342, "step": 7966, "task_loss": 0.04470521956682205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999839290570471, "compression_loss": 0.0, "distillation_loss": 0.023144233971834183, "epoch": 7.57, "learning_rate": 4.921107739180153e-05, "loss": 0.0267, "step": 7967, "task_loss": 0.05902532488107681 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999840340955565, "compression_loss": 0.0, "distillation_loss": 0.017418548464775085, "epoch": 7.57, "learning_rate": 4.9208419535134376e-05, "loss": 0.0189, "step": 7968, "task_loss": 0.03223051875829697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999841386753831, "compression_loss": 0.0, "distillation_loss": 0.2720472514629364, "epoch": 7.57, "learning_rate": 4.920575728089122e-05, "loss": 0.2749, "step": 7969, "task_loss": 0.3010197579860687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999842427975308, "compression_loss": 0.0, "distillation_loss": 0.052931733429431915, "epoch": 7.57, "learning_rate": 4.920309062955568e-05, "loss": 0.0564, "step": 7970, "task_loss": 0.0876016914844513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799984346463003, "compression_loss": 0.0, "distillation_loss": 0.03811418265104294, "epoch": 7.57, "learning_rate": 4.920041958161217e-05, "loss": 0.0421, "step": 7971, "task_loss": 0.0776088535785675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999844496728036, "compression_loss": 0.0, "distillation_loss": 0.033870723098516464, "epoch": 7.57, "learning_rate": 4.9197744137545884e-05, "loss": 0.0475, "step": 7972, "task_loss": 0.1701067090034485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999845524279362, "compression_loss": 0.0, "distillation_loss": 0.02466290071606636, "epoch": 7.57, "learning_rate": 4.919506429784284e-05, "loss": 0.0317, "step": 7973, "task_loss": 0.09482477605342865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999846547294045, "compression_loss": 0.0, "distillation_loss": 0.053808264434337616, "epoch": 7.57, "learning_rate": 4.919238006298984e-05, "loss": 0.0581, "step": 7974, "task_loss": 0.0962759405374527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999847565782121, "compression_loss": 0.0, "distillation_loss": 0.051727067679166794, "epoch": 7.57, "learning_rate": 4.9189691433474494e-05, "loss": 0.064, "step": 7975, "task_loss": 0.17400768399238586 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999848579753629, "compression_loss": 0.0, "distillation_loss": 0.029435984790325165, "epoch": 7.57, "learning_rate": 4.91869984097852e-05, "loss": 0.0384, "step": 7976, "task_loss": 0.11923931539058685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999849589218603, "compression_loss": 0.0, "distillation_loss": 0.030128782615065575, "epoch": 7.58, "learning_rate": 4.918430099241116e-05, "loss": 0.036, "step": 7977, "task_loss": 0.08850212395191193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999850594187082, "compression_loss": 0.0, "distillation_loss": 0.03268412873148918, "epoch": 7.58, "learning_rate": 4.918159918184236e-05, "loss": 0.0301, "step": 7978, "task_loss": 0.006716296076774597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999851594669102, "compression_loss": 0.0, "distillation_loss": 0.03931747376918793, "epoch": 7.58, "learning_rate": 4.9178892978569625e-05, "loss": 0.0459, "step": 7979, "task_loss": 0.10497306287288666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79998525906747, "compression_loss": 0.0, "distillation_loss": 0.015478894114494324, "epoch": 7.58, "learning_rate": 4.9176182383084524e-05, "loss": 0.0297, "step": 7980, "task_loss": 0.15772272646427155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999853582213913, "compression_loss": 0.0, "distillation_loss": 0.09228493273258209, "epoch": 7.58, "learning_rate": 4.917346739587946e-05, "loss": 0.0927, "step": 7981, "task_loss": 0.09685853123664856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999854569296777, "compression_loss": 0.0, "distillation_loss": 0.01883935183286667, "epoch": 7.58, "learning_rate": 4.917074801744763e-05, "loss": 0.0178, "step": 7982, "task_loss": 0.00816972553730011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999855551933329, "compression_loss": 0.0, "distillation_loss": 0.021920818835496902, "epoch": 7.58, "learning_rate": 4.916802424828301e-05, "loss": 0.0203, "step": 7983, "task_loss": 0.0058016423135995865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999856530133607, "compression_loss": 0.0, "distillation_loss": 0.02121850848197937, "epoch": 7.58, "learning_rate": 4.9165296088880384e-05, "loss": 0.0198, "step": 7984, "task_loss": 0.0068349651992321014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999857503907646, "compression_loss": 0.0, "distillation_loss": 0.11084365099668503, "epoch": 7.58, "learning_rate": 4.916256353973535e-05, "loss": 0.1116, "step": 7985, "task_loss": 0.11807304620742798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999858473265484, "compression_loss": 0.0, "distillation_loss": 0.031100111082196236, "epoch": 7.58, "learning_rate": 4.9159826601344286e-05, "loss": 0.0335, "step": 7986, "task_loss": 0.05468904972076416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999859438217158, "compression_loss": 0.0, "distillation_loss": 0.058571815490722656, "epoch": 7.58, "learning_rate": 4.915708527420435e-05, "loss": 0.065, "step": 7987, "task_loss": 0.1225350946187973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999860398772705, "compression_loss": 0.0, "distillation_loss": 0.033145397901535034, "epoch": 7.59, "learning_rate": 4.9154339558813546e-05, "loss": 0.0307, "step": 7988, "task_loss": 0.008651839569211006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999861354942159, "compression_loss": 0.0, "distillation_loss": 0.11876051872968674, "epoch": 7.59, "learning_rate": 4.915158945567062e-05, "loss": 0.1123, "step": 7989, "task_loss": 0.05439896881580353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999862306735561, "compression_loss": 0.0, "distillation_loss": 0.1755588948726654, "epoch": 7.59, "learning_rate": 4.914883496527516e-05, "loss": 0.1739, "step": 7990, "task_loss": 0.15885521471500397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999863254162946, "compression_loss": 0.0, "distillation_loss": 0.06931862980127335, "epoch": 7.59, "learning_rate": 4.914607608812753e-05, "loss": 0.0661, "step": 7991, "task_loss": 0.03663593530654907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799986419723435, "compression_loss": 0.0, "distillation_loss": 0.0520208366215229, "epoch": 7.59, "learning_rate": 4.9143312824728896e-05, "loss": 0.0687, "step": 7992, "task_loss": 0.2192659229040146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799986513595981, "compression_loss": 0.0, "distillation_loss": 0.045383553951978683, "epoch": 7.59, "learning_rate": 4.91405451755812e-05, "loss": 0.0435, "step": 7993, "task_loss": 0.026806168258190155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999866070349365, "compression_loss": 0.0, "distillation_loss": 0.013009166345000267, "epoch": 7.59, "learning_rate": 4.913777314118721e-05, "loss": 0.0176, "step": 7994, "task_loss": 0.05919176712632179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999867000413049, "compression_loss": 0.0, "distillation_loss": 0.0388064906001091, "epoch": 7.59, "learning_rate": 4.9134996722050483e-05, "loss": 0.0513, "step": 7995, "task_loss": 0.16394969820976257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999867926160901, "compression_loss": 0.0, "distillation_loss": 0.018643934279680252, "epoch": 7.59, "learning_rate": 4.913221591867537e-05, "loss": 0.0346, "step": 7996, "task_loss": 0.17829307913780212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999868847602956, "compression_loss": 0.0, "distillation_loss": 0.013243372552096844, "epoch": 7.59, "learning_rate": 4.912943073156701e-05, "loss": 0.0122, "step": 7997, "task_loss": 0.0027358736842870712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999869764749251, "compression_loss": 0.0, "distillation_loss": 0.036573659628629684, "epoch": 7.6, "learning_rate": 4.912664116123134e-05, "loss": 0.0333, "step": 7998, "task_loss": 0.003510754555463791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999870677609825, "compression_loss": 0.0, "distillation_loss": 0.03176216036081314, "epoch": 7.6, "learning_rate": 4.9123847208175126e-05, "loss": 0.0383, "step": 7999, "task_loss": 0.09740820527076721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999871586194712, "compression_loss": 0.0, "distillation_loss": 0.013369059190154076, "epoch": 7.6, "learning_rate": 4.912104887290587e-05, "loss": 0.0123, "step": 8000, "task_loss": 0.0031455066055059433 }, { "epoch": 7.6, "eval_accuracy": 0.8784403669724771, "eval_loss": 0.49849244952201843, "eval_runtime": 18.0456, "eval_samples_per_second": 48.322, "eval_steps_per_second": 6.04, "step": 8000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999872490513951, "compression_loss": 0.0, "distillation_loss": 0.10992217063903809, "epoch": 7.6, "learning_rate": 4.911824615593193e-05, "loss": 0.1131, "step": 8001, "task_loss": 0.1418878138065338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999873390577578, "compression_loss": 0.0, "distillation_loss": 0.042782656848430634, "epoch": 7.6, "learning_rate": 4.9115439057762416e-05, "loss": 0.0481, "step": 8002, "task_loss": 0.09572674334049225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999874286395628, "compression_loss": 0.0, "distillation_loss": 0.03839121758937836, "epoch": 7.6, "learning_rate": 4.911262757890726e-05, "loss": 0.0471, "step": 8003, "task_loss": 0.12578138709068298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999875177978142, "compression_loss": 0.0, "distillation_loss": 0.01663174107670784, "epoch": 7.6, "learning_rate": 4.9109811719877166e-05, "loss": 0.024, "step": 8004, "task_loss": 0.09052921831607819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999876065335153, "compression_loss": 0.0, "distillation_loss": 0.09784665703773499, "epoch": 7.6, "learning_rate": 4.910699148118367e-05, "loss": 0.0997, "step": 8005, "task_loss": 0.1160675436258316 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79998769484767, "compression_loss": 0.0, "distillation_loss": 0.027502398937940598, "epoch": 7.6, "learning_rate": 4.910416686333906e-05, "loss": 0.0349, "step": 8006, "task_loss": 0.10173024237155914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999877827412818, "compression_loss": 0.0, "distillation_loss": 0.03942710533738136, "epoch": 7.6, "learning_rate": 4.910133786685646e-05, "loss": 0.0418, "step": 8007, "task_loss": 0.06354475021362305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999878702153546, "compression_loss": 0.0, "distillation_loss": 0.016465935856103897, "epoch": 7.6, "learning_rate": 4.9098504492249764e-05, "loss": 0.0239, "step": 8008, "task_loss": 0.09078645706176758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999879572708918, "compression_loss": 0.0, "distillation_loss": 0.020133644342422485, "epoch": 7.61, "learning_rate": 4.9095666740033664e-05, "loss": 0.0187, "step": 8009, "task_loss": 0.006201488897204399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999880439088973, "compression_loss": 0.0, "distillation_loss": 0.04203331097960472, "epoch": 7.61, "learning_rate": 4.9092824610723655e-05, "loss": 0.0508, "step": 8010, "task_loss": 0.12994877994060516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999881301303748, "compression_loss": 0.0, "distillation_loss": 0.04908212646842003, "epoch": 7.61, "learning_rate": 4.908997810483602e-05, "loss": 0.0532, "step": 8011, "task_loss": 0.09064097702503204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799988215936328, "compression_loss": 0.0, "distillation_loss": 0.0626966580748558, "epoch": 7.61, "learning_rate": 4.908712722288785e-05, "loss": 0.088, "step": 8012, "task_loss": 0.316189706325531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999883013277603, "compression_loss": 0.0, "distillation_loss": 0.058064427226781845, "epoch": 7.61, "learning_rate": 4.9084271965397014e-05, "loss": 0.0626, "step": 8013, "task_loss": 0.1036590188741684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999883863056757, "compression_loss": 0.0, "distillation_loss": 0.04076586291193962, "epoch": 7.61, "learning_rate": 4.908141233288218e-05, "loss": 0.0374, "step": 8014, "task_loss": 0.00735941156744957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999884708710777, "compression_loss": 0.0, "distillation_loss": 0.08777140825986862, "epoch": 7.61, "learning_rate": 4.907854832586282e-05, "loss": 0.0976, "step": 8015, "task_loss": 0.18588636815547943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999885550249701, "compression_loss": 0.0, "distillation_loss": 0.038717057555913925, "epoch": 7.61, "learning_rate": 4.907567994485919e-05, "loss": 0.041, "step": 8016, "task_loss": 0.06178871542215347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999886387683565, "compression_loss": 0.0, "distillation_loss": 0.14275038242340088, "epoch": 7.61, "learning_rate": 4.9072807190392354e-05, "loss": 0.1415, "step": 8017, "task_loss": 0.13036131858825684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999887221022406, "compression_loss": 0.0, "distillation_loss": 0.024897240102291107, "epoch": 7.61, "learning_rate": 4.906993006298416e-05, "loss": 0.0451, "step": 8018, "task_loss": 0.22650375962257385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999888050276261, "compression_loss": 0.0, "distillation_loss": 0.14283829927444458, "epoch": 7.62, "learning_rate": 4.9067048563157235e-05, "loss": 0.1404, "step": 8019, "task_loss": 0.1185140609741211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999888875455167, "compression_loss": 0.0, "distillation_loss": 0.08865389972925186, "epoch": 7.62, "learning_rate": 4.906416269143505e-05, "loss": 0.0981, "step": 8020, "task_loss": 0.18301840126514435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799988969656916, "compression_loss": 0.0, "distillation_loss": 0.08406002819538116, "epoch": 7.62, "learning_rate": 4.90612724483418e-05, "loss": 0.0969, "step": 8021, "task_loss": 0.21275919675827026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999890513628277, "compression_loss": 0.0, "distillation_loss": 0.08485761284828186, "epoch": 7.62, "learning_rate": 4.905837783440253e-05, "loss": 0.0828, "step": 8022, "task_loss": 0.0644686371088028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999891326642555, "compression_loss": 0.0, "distillation_loss": 0.040136683732271194, "epoch": 7.62, "learning_rate": 4.905547885014307e-05, "loss": 0.0373, "step": 8023, "task_loss": 0.01128750666975975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999892135622032, "compression_loss": 0.0, "distillation_loss": 0.014311270788311958, "epoch": 7.62, "learning_rate": 4.9052575496090016e-05, "loss": 0.0133, "step": 8024, "task_loss": 0.004563674330711365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999892940576744, "compression_loss": 0.0, "distillation_loss": 0.03336448222398758, "epoch": 7.62, "learning_rate": 4.904966777277079e-05, "loss": 0.0309, "step": 8025, "task_loss": 0.008388657122850418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999893741516727, "compression_loss": 0.0, "distillation_loss": 0.030747603625059128, "epoch": 7.62, "learning_rate": 4.9046755680713586e-05, "loss": 0.033, "step": 8026, "task_loss": 0.05296764522790909 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999894538452018, "compression_loss": 0.0, "distillation_loss": 0.021251916885375977, "epoch": 7.62, "learning_rate": 4.90438392204474e-05, "loss": 0.0355, "step": 8027, "task_loss": 0.16398029029369354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999895331392655, "compression_loss": 0.0, "distillation_loss": 0.21242570877075195, "epoch": 7.62, "learning_rate": 4.9040918392502026e-05, "loss": 0.2064, "step": 8028, "task_loss": 0.15173274278640747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999896120348674, "compression_loss": 0.0, "distillation_loss": 0.020135043188929558, "epoch": 7.62, "learning_rate": 4.903799319740804e-05, "loss": 0.0209, "step": 8029, "task_loss": 0.02775605395436287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999896905330112, "compression_loss": 0.0, "distillation_loss": 0.033395953476428986, "epoch": 7.63, "learning_rate": 4.903506363569683e-05, "loss": 0.0337, "step": 8030, "task_loss": 0.036266930401325226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999897686347005, "compression_loss": 0.0, "distillation_loss": 0.02742244489490986, "epoch": 7.63, "learning_rate": 4.9032129707900556e-05, "loss": 0.0256, "step": 8031, "task_loss": 0.008756054565310478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999898463409392, "compression_loss": 0.0, "distillation_loss": 0.023902807384729385, "epoch": 7.63, "learning_rate": 4.9029191414552165e-05, "loss": 0.0294, "step": 8032, "task_loss": 0.07919642329216003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999899236527307, "compression_loss": 0.0, "distillation_loss": 0.03757765516638756, "epoch": 7.63, "learning_rate": 4.9026248756185445e-05, "loss": 0.0356, "step": 8033, "task_loss": 0.017600098624825478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999900005710788, "compression_loss": 0.0, "distillation_loss": 0.08396127820014954, "epoch": 7.63, "learning_rate": 4.902330173333492e-05, "loss": 0.0825, "step": 8034, "task_loss": 0.06926584243774414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999900770969873, "compression_loss": 0.0, "distillation_loss": 0.03239300101995468, "epoch": 7.63, "learning_rate": 4.9020350346535936e-05, "loss": 0.0354, "step": 8035, "task_loss": 0.062150366604328156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999901532314597, "compression_loss": 0.0, "distillation_loss": 0.0414559543132782, "epoch": 7.63, "learning_rate": 4.901739459632463e-05, "loss": 0.0448, "step": 8036, "task_loss": 0.07490754127502441 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999902289754999, "compression_loss": 0.0, "distillation_loss": 0.03927509859204292, "epoch": 7.63, "learning_rate": 4.901443448323792e-05, "loss": 0.0396, "step": 8037, "task_loss": 0.04217911139130592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999903043301113, "compression_loss": 0.0, "distillation_loss": 0.10875845700502396, "epoch": 7.63, "learning_rate": 4.901147000781355e-05, "loss": 0.1031, "step": 8038, "task_loss": 0.05257695913314819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999903792962978, "compression_loss": 0.0, "distillation_loss": 0.18646162748336792, "epoch": 7.63, "learning_rate": 4.9008501170589996e-05, "loss": 0.1765, "step": 8039, "task_loss": 0.08664773404598236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799990453875063, "compression_loss": 0.0, "distillation_loss": 0.13723516464233398, "epoch": 7.64, "learning_rate": 4.900552797210658e-05, "loss": 0.1398, "step": 8040, "task_loss": 0.16254064440727234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999905280674106, "compression_loss": 0.0, "distillation_loss": 0.05691935867071152, "epoch": 7.64, "learning_rate": 4.90025504129034e-05, "loss": 0.0737, "step": 8041, "task_loss": 0.22483864426612854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999906018743442, "compression_loss": 0.0, "distillation_loss": 0.017723996192216873, "epoch": 7.64, "learning_rate": 4.8999568493521345e-05, "loss": 0.0164, "step": 8042, "task_loss": 0.004084032028913498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999906752968677, "compression_loss": 0.0, "distillation_loss": 0.07552896440029144, "epoch": 7.64, "learning_rate": 4.899658221450208e-05, "loss": 0.0805, "step": 8043, "task_loss": 0.1257321536540985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999907483359846, "compression_loss": 0.0, "distillation_loss": 0.10802210122346878, "epoch": 7.64, "learning_rate": 4.899359157638809e-05, "loss": 0.1099, "step": 8044, "task_loss": 0.12680913507938385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999908209926986, "compression_loss": 0.0, "distillation_loss": 0.12618671357631683, "epoch": 7.64, "learning_rate": 4.899059657972264e-05, "loss": 0.1189, "step": 8045, "task_loss": 0.0529848150908947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999908932680134, "compression_loss": 0.0, "distillation_loss": 0.027106711640954018, "epoch": 7.64, "learning_rate": 4.898759722504977e-05, "loss": 0.0254, "step": 8046, "task_loss": 0.010013708844780922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999909651629327, "compression_loss": 0.0, "distillation_loss": 0.02689509466290474, "epoch": 7.64, "learning_rate": 4.8984593512914356e-05, "loss": 0.0332, "step": 8047, "task_loss": 0.08948872238397598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999910366784602, "compression_loss": 0.0, "distillation_loss": 0.19355759024620056, "epoch": 7.64, "learning_rate": 4.898158544386201e-05, "loss": 0.1822, "step": 8048, "task_loss": 0.08036290854215622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999911078155995, "compression_loss": 0.0, "distillation_loss": 0.029629534110426903, "epoch": 7.64, "learning_rate": 4.897857301843917e-05, "loss": 0.0298, "step": 8049, "task_loss": 0.030841048806905746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999911785753543, "compression_loss": 0.0, "distillation_loss": 0.12482485175132751, "epoch": 7.64, "learning_rate": 4.897555623719306e-05, "loss": 0.129, "step": 8050, "task_loss": 0.1669907122850418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999912489587283, "compression_loss": 0.0, "distillation_loss": 0.11259239912033081, "epoch": 7.65, "learning_rate": 4.897253510067169e-05, "loss": 0.1228, "step": 8051, "task_loss": 0.21496890485286713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999913189667253, "compression_loss": 0.0, "distillation_loss": 0.10157469660043716, "epoch": 7.65, "learning_rate": 4.896950960942387e-05, "loss": 0.1013, "step": 8052, "task_loss": 0.09880008548498154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999913886003488, "compression_loss": 0.0, "distillation_loss": 0.06962237507104874, "epoch": 7.65, "learning_rate": 4.896647976399919e-05, "loss": 0.0682, "step": 8053, "task_loss": 0.05498020350933075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999914578606026, "compression_loss": 0.0, "distillation_loss": 0.02622218243777752, "epoch": 7.65, "learning_rate": 4.896344556494804e-05, "loss": 0.0248, "step": 8054, "task_loss": 0.011739548295736313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999915267484903, "compression_loss": 0.0, "distillation_loss": 0.05761996656656265, "epoch": 7.65, "learning_rate": 4.8960407012821584e-05, "loss": 0.0619, "step": 8055, "task_loss": 0.10011887550354004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999915952650157, "compression_loss": 0.0, "distillation_loss": 0.028014160692691803, "epoch": 7.65, "learning_rate": 4.895736410817181e-05, "loss": 0.026, "step": 8056, "task_loss": 0.007695335894823074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999916634111823, "compression_loss": 0.0, "distillation_loss": 0.018892180174589157, "epoch": 7.65, "learning_rate": 4.8954316851551465e-05, "loss": 0.0177, "step": 8057, "task_loss": 0.006759140640497208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799991731187994, "compression_loss": 0.0, "distillation_loss": 0.13491389155387878, "epoch": 7.65, "learning_rate": 4.895126524351409e-05, "loss": 0.1503, "step": 8058, "task_loss": 0.2890867292881012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999917985964543, "compression_loss": 0.0, "distillation_loss": 0.04141715168952942, "epoch": 7.65, "learning_rate": 4.8948209284614046e-05, "loss": 0.039, "step": 8059, "task_loss": 0.01771230250597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999918656375671, "compression_loss": 0.0, "distillation_loss": 0.016739333048462868, "epoch": 7.65, "learning_rate": 4.894514897540643e-05, "loss": 0.0158, "step": 8060, "task_loss": 0.007118521258234978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999919323123358, "compression_loss": 0.0, "distillation_loss": 0.01123537216335535, "epoch": 7.66, "learning_rate": 4.89420843164472e-05, "loss": 0.0202, "step": 8061, "task_loss": 0.10039521753787994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999919986217642, "compression_loss": 0.0, "distillation_loss": 0.019477343186736107, "epoch": 7.66, "learning_rate": 4.893901530829304e-05, "loss": 0.0179, "step": 8062, "task_loss": 0.0040080491453409195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799992064566856, "compression_loss": 0.0, "distillation_loss": 0.04232557862997055, "epoch": 7.66, "learning_rate": 4.8935941951501463e-05, "loss": 0.0474, "step": 8063, "task_loss": 0.0932355746626854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999921301486149, "compression_loss": 0.0, "distillation_loss": 0.10348470509052277, "epoch": 7.66, "learning_rate": 4.893286424663075e-05, "loss": 0.0977, "step": 8064, "task_loss": 0.045336902141571045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999921953680447, "compression_loss": 0.0, "distillation_loss": 0.01743854209780693, "epoch": 7.66, "learning_rate": 4.892978219423998e-05, "loss": 0.0331, "step": 8065, "task_loss": 0.17381621897220612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999922602261488, "compression_loss": 0.0, "distillation_loss": 0.01273958757519722, "epoch": 7.66, "learning_rate": 4.892669579488903e-05, "loss": 0.0186, "step": 8066, "task_loss": 0.07183681428432465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799992324723931, "compression_loss": 0.0, "distillation_loss": 0.11235454678535461, "epoch": 7.66, "learning_rate": 4.892360504913856e-05, "loss": 0.1105, "step": 8067, "task_loss": 0.09365284442901611 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999923888623951, "compression_loss": 0.0, "distillation_loss": 0.05907963961362839, "epoch": 7.66, "learning_rate": 4.8920509957550016e-05, "loss": 0.054, "step": 8068, "task_loss": 0.008163506165146828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999924526425447, "compression_loss": 0.0, "distillation_loss": 0.023158248513936996, "epoch": 7.66, "learning_rate": 4.8917410520685635e-05, "loss": 0.0351, "step": 8069, "task_loss": 0.14244556427001953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999925160653834, "compression_loss": 0.0, "distillation_loss": 0.016128631308674812, "epoch": 7.66, "learning_rate": 4.891430673910844e-05, "loss": 0.015, "step": 8070, "task_loss": 0.0045019593089818954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799992579131915, "compression_loss": 0.0, "distillation_loss": 0.023808129131793976, "epoch": 7.66, "learning_rate": 4.891119861338226e-05, "loss": 0.022, "step": 8071, "task_loss": 0.005267852917313576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999926418431431, "compression_loss": 0.0, "distillation_loss": 0.02601657807826996, "epoch": 7.67, "learning_rate": 4.8908086144071694e-05, "loss": 0.0241, "step": 8072, "task_loss": 0.0071242451667785645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999927042000715, "compression_loss": 0.0, "distillation_loss": 0.045584745705127716, "epoch": 7.67, "learning_rate": 4.8904969331742136e-05, "loss": 0.0496, "step": 8073, "task_loss": 0.08568625897169113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999927662037037, "compression_loss": 0.0, "distillation_loss": 0.021556910127401352, "epoch": 7.67, "learning_rate": 4.890184817695976e-05, "loss": 0.0281, "step": 8074, "task_loss": 0.08685538172721863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999928278550436, "compression_loss": 0.0, "distillation_loss": 0.09852443635463715, "epoch": 7.67, "learning_rate": 4.8898722680291564e-05, "loss": 0.1017, "step": 8075, "task_loss": 0.13034701347351074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999928891550947, "compression_loss": 0.0, "distillation_loss": 0.034188371151685715, "epoch": 7.67, "learning_rate": 4.8895592842305295e-05, "loss": 0.0401, "step": 8076, "task_loss": 0.09356120228767395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999929501048607, "compression_loss": 0.0, "distillation_loss": 0.02725556679069996, "epoch": 7.67, "learning_rate": 4.88924586635695e-05, "loss": 0.0348, "step": 8077, "task_loss": 0.1023518443107605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999930107053455, "compression_loss": 0.0, "distillation_loss": 0.0407828688621521, "epoch": 7.67, "learning_rate": 4.888932014465352e-05, "loss": 0.0427, "step": 8078, "task_loss": 0.060073304921388626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999930709575525, "compression_loss": 0.0, "distillation_loss": 0.02005520462989807, "epoch": 7.67, "learning_rate": 4.888617728612749e-05, "loss": 0.0187, "step": 8079, "task_loss": 0.006268629804253578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999931308624855, "compression_loss": 0.0, "distillation_loss": 0.016719898208975792, "epoch": 7.67, "learning_rate": 4.888303008856231e-05, "loss": 0.0154, "step": 8080, "task_loss": 0.0033553019165992737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999931904211481, "compression_loss": 0.0, "distillation_loss": 0.01692567765712738, "epoch": 7.67, "learning_rate": 4.88798785525297e-05, "loss": 0.0213, "step": 8081, "task_loss": 0.060581427067518234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999932496345442, "compression_loss": 0.0, "distillation_loss": 0.03452851623296738, "epoch": 7.68, "learning_rate": 4.887672267860214e-05, "loss": 0.0371, "step": 8082, "task_loss": 0.059820882976055145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999933085036772, "compression_loss": 0.0, "distillation_loss": 0.022003114223480225, "epoch": 7.68, "learning_rate": 4.887356246735292e-05, "loss": 0.0203, "step": 8083, "task_loss": 0.004899036139249802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799993367029551, "compression_loss": 0.0, "distillation_loss": 0.07817333936691284, "epoch": 7.68, "learning_rate": 4.8870397919356094e-05, "loss": 0.0767, "step": 8084, "task_loss": 0.0634901374578476 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999934252131693, "compression_loss": 0.0, "distillation_loss": 0.08945944160223007, "epoch": 7.68, "learning_rate": 4.8867229035186526e-05, "loss": 0.0865, "step": 8085, "task_loss": 0.060247667133808136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999934830555356, "compression_loss": 0.0, "distillation_loss": 0.016880009323358536, "epoch": 7.68, "learning_rate": 4.886405581541986e-05, "loss": 0.0156, "step": 8086, "task_loss": 0.0044879671186208725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999935405576537, "compression_loss": 0.0, "distillation_loss": 0.02322390116751194, "epoch": 7.68, "learning_rate": 4.886087826063252e-05, "loss": 0.0216, "step": 8087, "task_loss": 0.006913455203175545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999935977205271, "compression_loss": 0.0, "distillation_loss": 0.055455997586250305, "epoch": 7.68, "learning_rate": 4.8857696371401735e-05, "loss": 0.0505, "step": 8088, "task_loss": 0.00570225715637207 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999936545451598, "compression_loss": 0.0, "distillation_loss": 0.28629836440086365, "epoch": 7.68, "learning_rate": 4.88545101483055e-05, "loss": 0.2723, "step": 8089, "task_loss": 0.14600038528442383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999937110325553, "compression_loss": 0.0, "distillation_loss": 0.04520022124052048, "epoch": 7.68, "learning_rate": 4.885131959192262e-05, "loss": 0.0451, "step": 8090, "task_loss": 0.0442010760307312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999937671837173, "compression_loss": 0.0, "distillation_loss": 0.0367523655295372, "epoch": 7.68, "learning_rate": 4.884812470283265e-05, "loss": 0.0401, "step": 8091, "task_loss": 0.07056266069412231 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999938229996495, "compression_loss": 0.0, "distillation_loss": 0.02565024048089981, "epoch": 7.68, "learning_rate": 4.884492548161599e-05, "loss": 0.0237, "step": 8092, "task_loss": 0.005736216902732849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999938784813555, "compression_loss": 0.0, "distillation_loss": 0.14084674417972565, "epoch": 7.69, "learning_rate": 4.8841721928853776e-05, "loss": 0.142, "step": 8093, "task_loss": 0.15224382281303406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999939336298391, "compression_loss": 0.0, "distillation_loss": 0.04901870712637901, "epoch": 7.69, "learning_rate": 4.8838514045127945e-05, "loss": 0.0518, "step": 8094, "task_loss": 0.07676392048597336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999939884461038, "compression_loss": 0.0, "distillation_loss": 0.07762657105922699, "epoch": 7.69, "learning_rate": 4.883530183102123e-05, "loss": 0.0828, "step": 8095, "task_loss": 0.1296282708644867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999940429311535, "compression_loss": 0.0, "distillation_loss": 0.060121599584817886, "epoch": 7.69, "learning_rate": 4.883208528711715e-05, "loss": 0.0675, "step": 8096, "task_loss": 0.13351008296012878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999940970859918, "compression_loss": 0.0, "distillation_loss": 0.07687482237815857, "epoch": 7.69, "learning_rate": 4.8828864413999995e-05, "loss": 0.0771, "step": 8097, "task_loss": 0.07926255464553833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999941509116224, "compression_loss": 0.0, "distillation_loss": 0.06490933150053024, "epoch": 7.69, "learning_rate": 4.8825639212254865e-05, "loss": 0.0652, "step": 8098, "task_loss": 0.0676705464720726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999942044090489, "compression_loss": 0.0, "distillation_loss": 0.03710645064711571, "epoch": 7.69, "learning_rate": 4.882240968246762e-05, "loss": 0.0375, "step": 8099, "task_loss": 0.04076388105750084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999942575792751, "compression_loss": 0.0, "distillation_loss": 0.02674899250268936, "epoch": 7.69, "learning_rate": 4.8819175825224925e-05, "loss": 0.0249, "step": 8100, "task_loss": 0.0087125264108181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999943104233046, "compression_loss": 0.0, "distillation_loss": 0.06944914162158966, "epoch": 7.69, "learning_rate": 4.881593764111424e-05, "loss": 0.0701, "step": 8101, "task_loss": 0.07624385505914688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799994362942141, "compression_loss": 0.0, "distillation_loss": 0.015938565135002136, "epoch": 7.69, "learning_rate": 4.8812695130723775e-05, "loss": 0.0148, "step": 8102, "task_loss": 0.004970138892531395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999944151367882, "compression_loss": 0.0, "distillation_loss": 0.07653353363275528, "epoch": 7.7, "learning_rate": 4.880944829464256e-05, "loss": 0.0844, "step": 8103, "task_loss": 0.155586376786232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999944670082497, "compression_loss": 0.0, "distillation_loss": 0.05173022672533989, "epoch": 7.7, "learning_rate": 4.880619713346039e-05, "loss": 0.0512, "step": 8104, "task_loss": 0.045994266867637634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999945185575293, "compression_loss": 0.0, "distillation_loss": 0.04804886877536774, "epoch": 7.7, "learning_rate": 4.8802941647767856e-05, "loss": 0.0604, "step": 8105, "task_loss": 0.1717759370803833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999945697856307, "compression_loss": 0.0, "distillation_loss": 0.08852384239435196, "epoch": 7.7, "learning_rate": 4.879968183815634e-05, "loss": 0.0908, "step": 8106, "task_loss": 0.11161627620458603 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999946206935573, "compression_loss": 0.0, "distillation_loss": 0.11721711605787277, "epoch": 7.7, "learning_rate": 4.8796417705217994e-05, "loss": 0.1135, "step": 8107, "task_loss": 0.07980488240718842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999946712823132, "compression_loss": 0.0, "distillation_loss": 0.14771784842014313, "epoch": 7.7, "learning_rate": 4.879314924954577e-05, "loss": 0.1429, "step": 8108, "task_loss": 0.09997382760047913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999947215529017, "compression_loss": 0.0, "distillation_loss": 0.07733377814292908, "epoch": 7.7, "learning_rate": 4.87898764717334e-05, "loss": 0.0812, "step": 8109, "task_loss": 0.1161360889673233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999947715063268, "compression_loss": 0.0, "distillation_loss": 0.06381769478321075, "epoch": 7.7, "learning_rate": 4.8786599372375384e-05, "loss": 0.0612, "step": 8110, "task_loss": 0.03766363486647606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799994821143592, "compression_loss": 0.0, "distillation_loss": 0.16585132479667664, "epoch": 7.7, "learning_rate": 4.878331795206705e-05, "loss": 0.1596, "step": 8111, "task_loss": 0.10337799787521362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799994870465701, "compression_loss": 0.0, "distillation_loss": 0.07819118350744247, "epoch": 7.7, "learning_rate": 4.878003221140446e-05, "loss": 0.0886, "step": 8112, "task_loss": 0.18247146904468536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999949194736575, "compression_loss": 0.0, "distillation_loss": 0.014311755076050758, "epoch": 7.7, "learning_rate": 4.877674215098449e-05, "loss": 0.0136, "step": 8113, "task_loss": 0.007542043924331665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999949681684653, "compression_loss": 0.0, "distillation_loss": 0.02407708764076233, "epoch": 7.71, "learning_rate": 4.87734477714048e-05, "loss": 0.0321, "step": 8114, "task_loss": 0.10392209887504578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999950165511278, "compression_loss": 0.0, "distillation_loss": 0.0483178049325943, "epoch": 7.71, "learning_rate": 4.8770149073263833e-05, "loss": 0.0544, "step": 8115, "task_loss": 0.10924448817968369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999950646226489, "compression_loss": 0.0, "distillation_loss": 0.06605200469493866, "epoch": 7.71, "learning_rate": 4.87668460571608e-05, "loss": 0.0702, "step": 8116, "task_loss": 0.10719858109951019 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999951123840323, "compression_loss": 0.0, "distillation_loss": 0.12172948569059372, "epoch": 7.71, "learning_rate": 4.8763538723695726e-05, "loss": 0.132, "step": 8117, "task_loss": 0.22487123310565948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999951598362816, "compression_loss": 0.0, "distillation_loss": 0.037446945905685425, "epoch": 7.71, "learning_rate": 4.87602270734694e-05, "loss": 0.0366, "step": 8118, "task_loss": 0.028905699029564857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999952069804004, "compression_loss": 0.0, "distillation_loss": 0.02980622835457325, "epoch": 7.71, "learning_rate": 4.8756911107083387e-05, "loss": 0.0321, "step": 8119, "task_loss": 0.05290570110082626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999952538173926, "compression_loss": 0.0, "distillation_loss": 0.02518196403980255, "epoch": 7.71, "learning_rate": 4.875359082514006e-05, "loss": 0.0235, "step": 8120, "task_loss": 0.008489791303873062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999953003482617, "compression_loss": 0.0, "distillation_loss": 0.019510727375745773, "epoch": 7.71, "learning_rate": 4.8750266228242555e-05, "loss": 0.0322, "step": 8121, "task_loss": 0.14628659188747406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999953465740115, "compression_loss": 0.0, "distillation_loss": 0.0406201109290123, "epoch": 7.71, "learning_rate": 4.874693731699481e-05, "loss": 0.0407, "step": 8122, "task_loss": 0.04184962809085846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999953924956456, "compression_loss": 0.0, "distillation_loss": 0.03238911181688309, "epoch": 7.71, "learning_rate": 4.8743604092001544e-05, "loss": 0.0395, "step": 8123, "task_loss": 0.10380247235298157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999954381141676, "compression_loss": 0.0, "distillation_loss": 0.08192041516304016, "epoch": 7.72, "learning_rate": 4.8740266553868236e-05, "loss": 0.0867, "step": 8124, "task_loss": 0.12997817993164062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999954834305814, "compression_loss": 0.0, "distillation_loss": 0.040653470903635025, "epoch": 7.72, "learning_rate": 4.873692470320117e-05, "loss": 0.0513, "step": 8125, "task_loss": 0.14687559008598328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999955284458905, "compression_loss": 0.0, "distillation_loss": 0.07809877395629883, "epoch": 7.72, "learning_rate": 4.8733578540607425e-05, "loss": 0.0812, "step": 8126, "task_loss": 0.10933775454759598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999955731610988, "compression_loss": 0.0, "distillation_loss": 0.0384988859295845, "epoch": 7.72, "learning_rate": 4.8730228066694825e-05, "loss": 0.0491, "step": 8127, "task_loss": 0.1440470814704895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999956175772097, "compression_loss": 0.0, "distillation_loss": 0.015554778277873993, "epoch": 7.72, "learning_rate": 4.872687328207202e-05, "loss": 0.0146, "step": 8128, "task_loss": 0.006275909021496773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999956616952271, "compression_loss": 0.0, "distillation_loss": 0.013756733387708664, "epoch": 7.72, "learning_rate": 4.872351418734841e-05, "loss": 0.0128, "step": 8129, "task_loss": 0.004298551008105278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999957055161545, "compression_loss": 0.0, "distillation_loss": 0.037815727293491364, "epoch": 7.72, "learning_rate": 4.8720150783134196e-05, "loss": 0.0426, "step": 8130, "task_loss": 0.08522268384695053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999957490409958, "compression_loss": 0.0, "distillation_loss": 0.0777861624956131, "epoch": 7.72, "learning_rate": 4.871678307004035e-05, "loss": 0.0729, "step": 8131, "task_loss": 0.0290694423019886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999957922707545, "compression_loss": 0.0, "distillation_loss": 0.021506596356630325, "epoch": 7.72, "learning_rate": 4.8713411048678635e-05, "loss": 0.0198, "step": 8132, "task_loss": 0.004600740969181061 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999958352064344, "compression_loss": 0.0, "distillation_loss": 0.13485758006572723, "epoch": 7.72, "learning_rate": 4.8710034719661614e-05, "loss": 0.1337, "step": 8133, "task_loss": 0.1231408417224884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799995877849039, "compression_loss": 0.0, "distillation_loss": 0.014092806726694107, "epoch": 7.72, "learning_rate": 4.870665408360258e-05, "loss": 0.0216, "step": 8134, "task_loss": 0.08945970237255096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999959201995722, "compression_loss": 0.0, "distillation_loss": 0.04725116491317749, "epoch": 7.73, "learning_rate": 4.870326914111567e-05, "loss": 0.044, "step": 8135, "task_loss": 0.014543771743774414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999959622590376, "compression_loss": 0.0, "distillation_loss": 0.09922022372484207, "epoch": 7.73, "learning_rate": 4.8699879892815756e-05, "loss": 0.09, "step": 8136, "task_loss": 0.007207704707980156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999960040284388, "compression_loss": 0.0, "distillation_loss": 0.062455762177705765, "epoch": 7.73, "learning_rate": 4.8696486339318524e-05, "loss": 0.0625, "step": 8137, "task_loss": 0.06264299154281616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999960455087797, "compression_loss": 0.0, "distillation_loss": 0.08884945511817932, "epoch": 7.73, "learning_rate": 4.8693088481240424e-05, "loss": 0.0883, "step": 8138, "task_loss": 0.0832391083240509 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999960867010637, "compression_loss": 0.0, "distillation_loss": 0.03622208908200264, "epoch": 7.73, "learning_rate": 4.86896863191987e-05, "loss": 0.0334, "step": 8139, "task_loss": 0.007611839100718498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999961276062947, "compression_loss": 0.0, "distillation_loss": 0.05895059183239937, "epoch": 7.73, "learning_rate": 4.8686279853811356e-05, "loss": 0.0593, "step": 8140, "task_loss": 0.06226490065455437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999961682254764, "compression_loss": 0.0, "distillation_loss": 0.03609544038772583, "epoch": 7.73, "learning_rate": 4.8682869085697206e-05, "loss": 0.0412, "step": 8141, "task_loss": 0.0870952308177948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999962085596122, "compression_loss": 0.0, "distillation_loss": 0.09176631271839142, "epoch": 7.73, "learning_rate": 4.8679454015475835e-05, "loss": 0.0958, "step": 8142, "task_loss": 0.13180381059646606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999962486097061, "compression_loss": 0.0, "distillation_loss": 0.15721860527992249, "epoch": 7.73, "learning_rate": 4.867603464376759e-05, "loss": 0.1509, "step": 8143, "task_loss": 0.09391912072896957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999962883767615, "compression_loss": 0.0, "distillation_loss": 0.020519878715276718, "epoch": 7.73, "learning_rate": 4.867261097119363e-05, "loss": 0.0241, "step": 8144, "task_loss": 0.0560954324901104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999963278617823, "compression_loss": 0.0, "distillation_loss": 0.10751358419656754, "epoch": 7.74, "learning_rate": 4.8669182998375884e-05, "loss": 0.1109, "step": 8145, "task_loss": 0.14114311337471008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999963670657722, "compression_loss": 0.0, "distillation_loss": 0.06851037591695786, "epoch": 7.74, "learning_rate": 4.8665750725937045e-05, "loss": 0.0684, "step": 8146, "task_loss": 0.06784656643867493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999964059897348, "compression_loss": 0.0, "distillation_loss": 0.0376189686357975, "epoch": 7.74, "learning_rate": 4.866231415450062e-05, "loss": 0.0377, "step": 8147, "task_loss": 0.03808595612645149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999964446346736, "compression_loss": 0.0, "distillation_loss": 0.03249119967222214, "epoch": 7.74, "learning_rate": 4.8658873284690866e-05, "loss": 0.0449, "step": 8148, "task_loss": 0.15644009411334991 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999964830015925, "compression_loss": 0.0, "distillation_loss": 0.04549805074930191, "epoch": 7.74, "learning_rate": 4.865542811713284e-05, "loss": 0.0521, "step": 8149, "task_loss": 0.11196024715900421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999965210914953, "compression_loss": 0.0, "distillation_loss": 0.07704181969165802, "epoch": 7.74, "learning_rate": 4.865197865245237e-05, "loss": 0.0857, "step": 8150, "task_loss": 0.16380248963832855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999965589053855, "compression_loss": 0.0, "distillation_loss": 0.06383125483989716, "epoch": 7.74, "learning_rate": 4.8648524891276066e-05, "loss": 0.0624, "step": 8151, "task_loss": 0.049697332084178925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999965964442667, "compression_loss": 0.0, "distillation_loss": 0.10283863544464111, "epoch": 7.74, "learning_rate": 4.8645066834231325e-05, "loss": 0.1101, "step": 8152, "task_loss": 0.17590855062007904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999966337091426, "compression_loss": 0.0, "distillation_loss": 0.09706176817417145, "epoch": 7.74, "learning_rate": 4.8641604481946314e-05, "loss": 0.1095, "step": 8153, "task_loss": 0.22108034789562225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999966707010172, "compression_loss": 0.0, "distillation_loss": 0.08932052552700043, "epoch": 7.74, "learning_rate": 4.863813783504999e-05, "loss": 0.0871, "step": 8154, "task_loss": 0.0668034479022026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999967074208939, "compression_loss": 0.0, "distillation_loss": 0.022766336798667908, "epoch": 7.74, "learning_rate": 4.863466689417209e-05, "loss": 0.0263, "step": 8155, "task_loss": 0.058080703020095825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999967438697764, "compression_loss": 0.0, "distillation_loss": 0.04971291124820709, "epoch": 7.75, "learning_rate": 4.863119165994312e-05, "loss": 0.0537, "step": 8156, "task_loss": 0.08949106186628342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999967800486684, "compression_loss": 0.0, "distillation_loss": 0.04288540780544281, "epoch": 7.75, "learning_rate": 4.862771213299438e-05, "loss": 0.039, "step": 8157, "task_loss": 0.004233557730913162 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999968159585735, "compression_loss": 0.0, "distillation_loss": 0.01929197832942009, "epoch": 7.75, "learning_rate": 4.8624228313957937e-05, "loss": 0.0265, "step": 8158, "task_loss": 0.09141400456428528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999968516004956, "compression_loss": 0.0, "distillation_loss": 0.04137708246707916, "epoch": 7.75, "learning_rate": 4.862074020346664e-05, "loss": 0.0378, "step": 8159, "task_loss": 0.0055605582892894745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999968869754382, "compression_loss": 0.0, "distillation_loss": 0.01853022351861, "epoch": 7.75, "learning_rate": 4.8617247802154134e-05, "loss": 0.0252, "step": 8160, "task_loss": 0.085117407143116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999969220844052, "compression_loss": 0.0, "distillation_loss": 0.07992290705442429, "epoch": 7.75, "learning_rate": 4.861375111065482e-05, "loss": 0.0841, "step": 8161, "task_loss": 0.12121039628982544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999969569284, "compression_loss": 0.0, "distillation_loss": 0.02700880542397499, "epoch": 7.75, "learning_rate": 4.861025012960389e-05, "loss": 0.0271, "step": 8162, "task_loss": 0.027840938419103622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999969915084264, "compression_loss": 0.0, "distillation_loss": 0.04433220624923706, "epoch": 7.75, "learning_rate": 4.8606744859637316e-05, "loss": 0.0487, "step": 8163, "task_loss": 0.08848340809345245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999970258254882, "compression_loss": 0.0, "distillation_loss": 0.022559959441423416, "epoch": 7.75, "learning_rate": 4.8603235301391844e-05, "loss": 0.0306, "step": 8164, "task_loss": 0.10257212072610855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999970598805889, "compression_loss": 0.0, "distillation_loss": 0.020599160343408585, "epoch": 7.75, "learning_rate": 4.859972145550501e-05, "loss": 0.0287, "step": 8165, "task_loss": 0.10190172493457794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999970936747323, "compression_loss": 0.0, "distillation_loss": 0.044832177460193634, "epoch": 7.75, "learning_rate": 4.859620332261512e-05, "loss": 0.0469, "step": 8166, "task_loss": 0.0659225732088089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999971272089219, "compression_loss": 0.0, "distillation_loss": 0.0355226993560791, "epoch": 7.76, "learning_rate": 4.8592680903361247e-05, "loss": 0.0403, "step": 8167, "task_loss": 0.08283548802137375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999971604841616, "compression_loss": 0.0, "distillation_loss": 0.06762556731700897, "epoch": 7.76, "learning_rate": 4.858915419838327e-05, "loss": 0.0614, "step": 8168, "task_loss": 0.005646536126732826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799997193501455, "compression_loss": 0.0, "distillation_loss": 0.02109435945749283, "epoch": 7.76, "learning_rate": 4.8585623208321825e-05, "loss": 0.0389, "step": 8169, "task_loss": 0.19891361892223358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999972262618058, "compression_loss": 0.0, "distillation_loss": 0.029250595718622208, "epoch": 7.76, "learning_rate": 4.858208793381833e-05, "loss": 0.0603, "step": 8170, "task_loss": 0.3400185704231262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999972587662176, "compression_loss": 0.0, "distillation_loss": 0.10609747469425201, "epoch": 7.76, "learning_rate": 4.8578548375514995e-05, "loss": 0.0997, "step": 8171, "task_loss": 0.042343318462371826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999972910156943, "compression_loss": 0.0, "distillation_loss": 0.03819608315825462, "epoch": 7.76, "learning_rate": 4.8575004534054794e-05, "loss": 0.036, "step": 8172, "task_loss": 0.016501694917678833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999973230112394, "compression_loss": 0.0, "distillation_loss": 0.020714174956083298, "epoch": 7.76, "learning_rate": 4.8571456410081474e-05, "loss": 0.0334, "step": 8173, "task_loss": 0.14768551290035248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999973547538565, "compression_loss": 0.0, "distillation_loss": 0.04434049874544144, "epoch": 7.76, "learning_rate": 4.856790400423958e-05, "loss": 0.0539, "step": 8174, "task_loss": 0.13957616686820984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999973862445494, "compression_loss": 0.0, "distillation_loss": 0.024984436109662056, "epoch": 7.76, "learning_rate": 4.856434731717442e-05, "loss": 0.0395, "step": 8175, "task_loss": 0.1703425943851471 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999974174843218, "compression_loss": 0.0, "distillation_loss": 0.013447124511003494, "epoch": 7.76, "learning_rate": 4.8560786349532075e-05, "loss": 0.0169, "step": 8176, "task_loss": 0.04839882627129555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999974484741774, "compression_loss": 0.0, "distillation_loss": 0.04491272196173668, "epoch": 7.77, "learning_rate": 4.855722110195943e-05, "loss": 0.0685, "step": 8177, "task_loss": 0.28108105063438416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999974792151199, "compression_loss": 0.0, "distillation_loss": 0.022170700132846832, "epoch": 7.77, "learning_rate": 4.8553651575104114e-05, "loss": 0.033, "step": 8178, "task_loss": 0.130245178937912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999975097081529, "compression_loss": 0.0, "distillation_loss": 0.0931999534368515, "epoch": 7.77, "learning_rate": 4.8550077769614554e-05, "loss": 0.0971, "step": 8179, "task_loss": 0.1317388266324997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79999753995428, "compression_loss": 0.0, "distillation_loss": 0.019387193024158478, "epoch": 7.77, "learning_rate": 4.8546499686139944e-05, "loss": 0.0178, "step": 8180, "task_loss": 0.0039298199117183685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799997569954505, "compression_loss": 0.0, "distillation_loss": 0.014103731140494347, "epoch": 7.77, "learning_rate": 4.854291732533027e-05, "loss": 0.0132, "step": 8181, "task_loss": 0.005378361791372299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999975997098316, "compression_loss": 0.0, "distillation_loss": 0.019791752099990845, "epoch": 7.77, "learning_rate": 4.853933068783628e-05, "loss": 0.0279, "step": 8182, "task_loss": 0.10089607536792755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999976292212635, "compression_loss": 0.0, "distillation_loss": 0.08148065209388733, "epoch": 7.77, "learning_rate": 4.853573977430951e-05, "loss": 0.0862, "step": 8183, "task_loss": 0.128190815448761 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999976584898043, "compression_loss": 0.0, "distillation_loss": 0.045781854540109634, "epoch": 7.77, "learning_rate": 4.8532144585402254e-05, "loss": 0.0464, "step": 8184, "task_loss": 0.051745254546403885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999976875164577, "compression_loss": 0.0, "distillation_loss": 0.0476515106856823, "epoch": 7.77, "learning_rate": 4.85285451217676e-05, "loss": 0.0505, "step": 8185, "task_loss": 0.07574954628944397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999977163022274, "compression_loss": 0.0, "distillation_loss": 0.15082959830760956, "epoch": 7.77, "learning_rate": 4.8524941384059415e-05, "loss": 0.1472, "step": 8186, "task_loss": 0.11474103480577469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999977448481171, "compression_loss": 0.0, "distillation_loss": 0.016797035932540894, "epoch": 7.77, "learning_rate": 4.8521333372932326e-05, "loss": 0.0269, "step": 8187, "task_loss": 0.11737027764320374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999977731551304, "compression_loss": 0.0, "distillation_loss": 0.035971708595752716, "epoch": 7.78, "learning_rate": 4.851772108904175e-05, "loss": 0.0345, "step": 8188, "task_loss": 0.021439943462610245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999978012242711, "compression_loss": 0.0, "distillation_loss": 0.024865467101335526, "epoch": 7.78, "learning_rate": 4.851410453304388e-05, "loss": 0.0321, "step": 8189, "task_loss": 0.09683802723884583 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999978290565428, "compression_loss": 0.0, "distillation_loss": 0.02018059231340885, "epoch": 7.78, "learning_rate": 4.851048370559567e-05, "loss": 0.0277, "step": 8190, "task_loss": 0.09545273333787918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999978566529493, "compression_loss": 0.0, "distillation_loss": 0.032620202749967575, "epoch": 7.78, "learning_rate": 4.850685860735487e-05, "loss": 0.0374, "step": 8191, "task_loss": 0.08022205531597137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999978840144941, "compression_loss": 0.0, "distillation_loss": 0.03163286671042442, "epoch": 7.78, "learning_rate": 4.850322923898e-05, "loss": 0.0294, "step": 8192, "task_loss": 0.00926467590034008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799997911142181, "compression_loss": 0.0, "distillation_loss": 0.06667460501194, "epoch": 7.78, "learning_rate": 4.8499595601130337e-05, "loss": 0.0806, "step": 8193, "task_loss": 0.20638611912727356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999979380370137, "compression_loss": 0.0, "distillation_loss": 0.1302226334810257, "epoch": 7.78, "learning_rate": 4.849595769446596e-05, "loss": 0.1247, "step": 8194, "task_loss": 0.07523670792579651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999979646999957, "compression_loss": 0.0, "distillation_loss": 0.12275674939155579, "epoch": 7.78, "learning_rate": 4.849231551964771e-05, "loss": 0.1306, "step": 8195, "task_loss": 0.20129883289337158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999979911321309, "compression_loss": 0.0, "distillation_loss": 0.03874664753675461, "epoch": 7.78, "learning_rate": 4.848866907733721e-05, "loss": 0.0359, "step": 8196, "task_loss": 0.009907728061079979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999980173344229, "compression_loss": 0.0, "distillation_loss": 0.07732771337032318, "epoch": 7.78, "learning_rate": 4.848501836819684e-05, "loss": 0.0893, "step": 8197, "task_loss": 0.19742365181446075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999980433078754, "compression_loss": 0.0, "distillation_loss": 0.024056117981672287, "epoch": 7.79, "learning_rate": 4.848136339288979e-05, "loss": 0.0229, "step": 8198, "task_loss": 0.012888273224234581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998069053492, "compression_loss": 0.0, "distillation_loss": 0.1355004906654358, "epoch": 7.79, "learning_rate": 4.8477704152079984e-05, "loss": 0.1258, "step": 8199, "task_loss": 0.03880665823817253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999980945722766, "compression_loss": 0.0, "distillation_loss": 0.09072038531303406, "epoch": 7.79, "learning_rate": 4.8474040646432153e-05, "loss": 0.0949, "step": 8200, "task_loss": 0.13292811810970306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999981198652326, "compression_loss": 0.0, "distillation_loss": 0.04228777810931206, "epoch": 7.79, "learning_rate": 4.8470372876611784e-05, "loss": 0.0399, "step": 8201, "task_loss": 0.0188329815864563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999981449333639, "compression_loss": 0.0, "distillation_loss": 0.05842405557632446, "epoch": 7.79, "learning_rate": 4.846670084328515e-05, "loss": 0.0558, "step": 8202, "task_loss": 0.03242020681500435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999981697776739, "compression_loss": 0.0, "distillation_loss": 0.10038132965564728, "epoch": 7.79, "learning_rate": 4.846302454711929e-05, "loss": 0.0992, "step": 8203, "task_loss": 0.08900587260723114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999981943991666, "compression_loss": 0.0, "distillation_loss": 0.06294578313827515, "epoch": 7.79, "learning_rate": 4.845934398878202e-05, "loss": 0.0766, "step": 8204, "task_loss": 0.19954392313957214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999982187988456, "compression_loss": 0.0, "distillation_loss": 0.015763722360134125, "epoch": 7.79, "learning_rate": 4.845565916894193e-05, "loss": 0.0244, "step": 8205, "task_loss": 0.10191762447357178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999982429777145, "compression_loss": 0.0, "distillation_loss": 0.04905875027179718, "epoch": 7.79, "learning_rate": 4.8451970088268396e-05, "loss": 0.0462, "step": 8206, "task_loss": 0.0201642494648695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998266936777, "compression_loss": 0.0, "distillation_loss": 0.03759082779288292, "epoch": 7.79, "learning_rate": 4.8448276747431545e-05, "loss": 0.0349, "step": 8207, "task_loss": 0.010456247255206108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999982906770369, "compression_loss": 0.0, "distillation_loss": 0.01988394558429718, "epoch": 7.79, "learning_rate": 4.84445791471023e-05, "loss": 0.0286, "step": 8208, "task_loss": 0.10685549676418304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999983141994976, "compression_loss": 0.0, "distillation_loss": 0.1849537491798401, "epoch": 7.8, "learning_rate": 4.8440877287952336e-05, "loss": 0.1814, "step": 8209, "task_loss": 0.14974652230739594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999983375051631, "compression_loss": 0.0, "distillation_loss": 0.03731781989336014, "epoch": 7.8, "learning_rate": 4.8437171170654125e-05, "loss": 0.0406, "step": 8210, "task_loss": 0.07024012506008148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998360595037, "compression_loss": 0.0, "distillation_loss": 0.027419112622737885, "epoch": 7.8, "learning_rate": 4.843346079588089e-05, "loss": 0.0254, "step": 8211, "task_loss": 0.00690159946680069 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999983834701229, "compression_loss": 0.0, "distillation_loss": 0.03823021054267883, "epoch": 7.8, "learning_rate": 4.842974616430665e-05, "loss": 0.0443, "step": 8212, "task_loss": 0.09891167283058167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999984061314245, "compression_loss": 0.0, "distillation_loss": 0.022461410611867905, "epoch": 7.8, "learning_rate": 4.842602727660618e-05, "loss": 0.0244, "step": 8213, "task_loss": 0.04227849468588829 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999984285799454, "compression_loss": 0.0, "distillation_loss": 0.06684540212154388, "epoch": 7.8, "learning_rate": 4.842230413345503e-05, "loss": 0.0692, "step": 8214, "task_loss": 0.09044404327869415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999984508166894, "compression_loss": 0.0, "distillation_loss": 0.041791971772909164, "epoch": 7.8, "learning_rate": 4.8418576735529535e-05, "loss": 0.0441, "step": 8215, "task_loss": 0.06479668617248535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999984728426602, "compression_loss": 0.0, "distillation_loss": 0.03619157522916794, "epoch": 7.8, "learning_rate": 4.841484508350679e-05, "loss": 0.0353, "step": 8216, "task_loss": 0.02702299691736698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999984946588615, "compression_loss": 0.0, "distillation_loss": 0.0437760166823864, "epoch": 7.8, "learning_rate": 4.841110917806467e-05, "loss": 0.0542, "step": 8217, "task_loss": 0.14790481328964233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999985162662969, "compression_loss": 0.0, "distillation_loss": 0.03585965931415558, "epoch": 7.8, "learning_rate": 4.840736901988182e-05, "loss": 0.0396, "step": 8218, "task_loss": 0.07364504784345627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999985376659701, "compression_loss": 0.0, "distillation_loss": 0.12400738894939423, "epoch": 7.81, "learning_rate": 4.840362460963765e-05, "loss": 0.1329, "step": 8219, "task_loss": 0.21300512552261353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999985588588847, "compression_loss": 0.0, "distillation_loss": 0.04792033135890961, "epoch": 7.81, "learning_rate": 4.8399875948012355e-05, "loss": 0.0518, "step": 8220, "task_loss": 0.0868147611618042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999985798460446, "compression_loss": 0.0, "distillation_loss": 0.04659024253487587, "epoch": 7.81, "learning_rate": 4.8396123035686906e-05, "loss": 0.0427, "step": 8221, "task_loss": 0.007754124701023102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999986006284533, "compression_loss": 0.0, "distillation_loss": 0.19927890598773956, "epoch": 7.81, "learning_rate": 4.839236587334303e-05, "loss": 0.2014, "step": 8222, "task_loss": 0.22048905491828918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999986212071145, "compression_loss": 0.0, "distillation_loss": 0.08156438916921616, "epoch": 7.81, "learning_rate": 4.8388604461663236e-05, "loss": 0.0814, "step": 8223, "task_loss": 0.08026103675365448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998641583032, "compression_loss": 0.0, "distillation_loss": 0.03177830949425697, "epoch": 7.81, "learning_rate": 4.838483880133079e-05, "loss": 0.0392, "step": 8224, "task_loss": 0.10596133768558502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999986617572094, "compression_loss": 0.0, "distillation_loss": 0.23754796385765076, "epoch": 7.81, "learning_rate": 4.8381068893029766e-05, "loss": 0.2324, "step": 8225, "task_loss": 0.18612830340862274 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999986817306503, "compression_loss": 0.0, "distillation_loss": 0.04634622856974602, "epoch": 7.81, "learning_rate": 4.837729473744497e-05, "loss": 0.0424, "step": 8226, "task_loss": 0.0071801114827394485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987015043585, "compression_loss": 0.0, "distillation_loss": 0.03712467849254608, "epoch": 7.81, "learning_rate": 4.8373516335261994e-05, "loss": 0.034, "step": 8227, "task_loss": 0.0057260747998952866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987210793376, "compression_loss": 0.0, "distillation_loss": 0.038928285241127014, "epoch": 7.81, "learning_rate": 4.8369733687167204e-05, "loss": 0.0372, "step": 8228, "task_loss": 0.021699724718928337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987404565914, "compression_loss": 0.0, "distillation_loss": 0.022505152970552444, "epoch": 7.81, "learning_rate": 4.836594679384775e-05, "loss": 0.0259, "step": 8229, "task_loss": 0.056766681373119354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987596371234, "compression_loss": 0.0, "distillation_loss": 0.10139244794845581, "epoch": 7.82, "learning_rate": 4.836215565599152e-05, "loss": 0.096, "step": 8230, "task_loss": 0.0474693700671196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987786219375, "compression_loss": 0.0, "distillation_loss": 0.14026379585266113, "epoch": 7.82, "learning_rate": 4.835836027428722e-05, "loss": 0.1379, "step": 8231, "task_loss": 0.11672936379909515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999987974120373, "compression_loss": 0.0, "distillation_loss": 0.10862313210964203, "epoch": 7.82, "learning_rate": 4.8354560649424264e-05, "loss": 0.1071, "step": 8232, "task_loss": 0.09331917762756348 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999988160084264, "compression_loss": 0.0, "distillation_loss": 0.01986592821776867, "epoch": 7.82, "learning_rate": 4.8350756782092894e-05, "loss": 0.026, "step": 8233, "task_loss": 0.08163642883300781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999988344121085, "compression_loss": 0.0, "distillation_loss": 0.05118180066347122, "epoch": 7.82, "learning_rate": 4.8346948672984096e-05, "loss": 0.0483, "step": 8234, "task_loss": 0.0220907311886549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999988526240873, "compression_loss": 0.0, "distillation_loss": 0.14736244082450867, "epoch": 7.82, "learning_rate": 4.8343136322789626e-05, "loss": 0.1584, "step": 8235, "task_loss": 0.2577117383480072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999988706453666, "compression_loss": 0.0, "distillation_loss": 0.1561247706413269, "epoch": 7.82, "learning_rate": 4.8339319732202024e-05, "loss": 0.1704, "step": 8236, "task_loss": 0.29920265078544617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79999888847695, "compression_loss": 0.0, "distillation_loss": 0.02194085717201233, "epoch": 7.82, "learning_rate": 4.83354989019146e-05, "loss": 0.0284, "step": 8237, "task_loss": 0.0863027274608612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998906119841, "compression_loss": 0.0, "distillation_loss": 0.012062850408256054, "epoch": 7.82, "learning_rate": 4.83316738326214e-05, "loss": 0.0113, "step": 8238, "task_loss": 0.004339534789323807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999989235750437, "compression_loss": 0.0, "distillation_loss": 0.11506865918636322, "epoch": 7.82, "learning_rate": 4.832784452501729e-05, "loss": 0.1089, "step": 8239, "task_loss": 0.05316751450300217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999989408435614, "compression_loss": 0.0, "distillation_loss": 0.02399417757987976, "epoch": 7.83, "learning_rate": 4.8324010979797875e-05, "loss": 0.0228, "step": 8240, "task_loss": 0.012368382886052132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999989579263979, "compression_loss": 0.0, "distillation_loss": 0.08361876010894775, "epoch": 7.83, "learning_rate": 4.8320173197659534e-05, "loss": 0.0955, "step": 8241, "task_loss": 0.20286312699317932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999989748245568, "compression_loss": 0.0, "distillation_loss": 0.03970951959490776, "epoch": 7.83, "learning_rate": 4.831633117929942e-05, "loss": 0.0364, "step": 8242, "task_loss": 0.006229208782315254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799998991539042, "compression_loss": 0.0, "distillation_loss": 0.01506102830171585, "epoch": 7.83, "learning_rate": 4.831248492541545e-05, "loss": 0.0141, "step": 8243, "task_loss": 0.005801372230052948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999008070857, "compression_loss": 0.0, "distillation_loss": 0.040881797671318054, "epoch": 7.83, "learning_rate": 4.830863443670632e-05, "loss": 0.0392, "step": 8244, "task_loss": 0.023911267518997192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999990244210056, "compression_loss": 0.0, "distillation_loss": 0.01167452521622181, "epoch": 7.83, "learning_rate": 4.8304779713871495e-05, "loss": 0.0109, "step": 8245, "task_loss": 0.0038937367498874664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999990405904914, "compression_loss": 0.0, "distillation_loss": 0.15877902507781982, "epoch": 7.83, "learning_rate": 4.83009207576112e-05, "loss": 0.1789, "step": 8246, "task_loss": 0.3599551320075989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999990565803181, "compression_loss": 0.0, "distillation_loss": 0.03877423331141472, "epoch": 7.83, "learning_rate": 4.829705756862642e-05, "loss": 0.0432, "step": 8247, "task_loss": 0.08290007710456848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999990723914894, "compression_loss": 0.0, "distillation_loss": 0.12737919390201569, "epoch": 7.83, "learning_rate": 4.829319014761894e-05, "loss": 0.1222, "step": 8248, "task_loss": 0.07570772618055344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999088025009, "compression_loss": 0.0, "distillation_loss": 0.07020660489797592, "epoch": 7.83, "learning_rate": 4.828931849529129e-05, "loss": 0.0701, "step": 8249, "task_loss": 0.06908702105283737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991034818805, "compression_loss": 0.0, "distillation_loss": 0.10183189064264297, "epoch": 7.83, "learning_rate": 4.8285442612346774e-05, "loss": 0.0971, "step": 8250, "task_loss": 0.05413543060421944 }, { "epoch": 7.83, "eval_accuracy": 0.8899082568807339, "eval_loss": 0.4171345829963684, "eval_runtime": 18.0817, "eval_samples_per_second": 48.225, "eval_steps_per_second": 6.028, "step": 8250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991187631076, "compression_loss": 0.0, "distillation_loss": 0.07504773885011673, "epoch": 7.84, "learning_rate": 4.828156249948946e-05, "loss": 0.0727, "step": 8251, "task_loss": 0.051514700055122375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991338696941, "compression_loss": 0.0, "distillation_loss": 0.022689666599035263, "epoch": 7.84, "learning_rate": 4.827767815742419e-05, "loss": 0.031, "step": 8252, "task_loss": 0.10627881437540054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991488026436, "compression_loss": 0.0, "distillation_loss": 0.045913174748420715, "epoch": 7.84, "learning_rate": 4.8273789586856574e-05, "loss": 0.0693, "step": 8253, "task_loss": 0.27937787771224976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991635629597, "compression_loss": 0.0, "distillation_loss": 0.08047342300415039, "epoch": 7.84, "learning_rate": 4.8269896788493e-05, "loss": 0.0857, "step": 8254, "task_loss": 0.1325674057006836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991781516462, "compression_loss": 0.0, "distillation_loss": 0.02534073404967785, "epoch": 7.84, "learning_rate": 4.8265999763040603e-05, "loss": 0.0333, "step": 8255, "task_loss": 0.10540292412042618 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999991925697068, "compression_loss": 0.0, "distillation_loss": 0.08169533312320709, "epoch": 7.84, "learning_rate": 4.8262098511207295e-05, "loss": 0.0787, "step": 8256, "task_loss": 0.0512889064848423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999992068181451, "compression_loss": 0.0, "distillation_loss": 0.14142774045467377, "epoch": 7.84, "learning_rate": 4.825819303370177e-05, "loss": 0.1345, "step": 8257, "task_loss": 0.07237007468938828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999992208979647, "compression_loss": 0.0, "distillation_loss": 0.026454931125044823, "epoch": 7.84, "learning_rate": 4.8254283331233464e-05, "loss": 0.0294, "step": 8258, "task_loss": 0.05604104697704315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999992348101694, "compression_loss": 0.0, "distillation_loss": 0.022744864225387573, "epoch": 7.84, "learning_rate": 4.825036940451259e-05, "loss": 0.0312, "step": 8259, "task_loss": 0.10733005404472351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999248555763, "compression_loss": 0.0, "distillation_loss": 0.19848661124706268, "epoch": 7.84, "learning_rate": 4.8246451254250145e-05, "loss": 0.1952, "step": 8260, "task_loss": 0.1654394567012787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999262135749, "compression_loss": 0.0, "distillation_loss": 0.038717396557331085, "epoch": 7.85, "learning_rate": 4.8242528881157866e-05, "loss": 0.0535, "step": 8261, "task_loss": 0.18647165596485138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999992755511312, "compression_loss": 0.0, "distillation_loss": 0.07267004996538162, "epoch": 7.85, "learning_rate": 4.823860228594829e-05, "loss": 0.0775, "step": 8262, "task_loss": 0.1208515465259552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999992888029132, "compression_loss": 0.0, "distillation_loss": 0.07859297096729279, "epoch": 7.85, "learning_rate": 4.823467146933468e-05, "loss": 0.0764, "step": 8263, "task_loss": 0.05707138776779175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993018920986, "compression_loss": 0.0, "distillation_loss": 0.015503250993788242, "epoch": 7.85, "learning_rate": 4.823073643203111e-05, "loss": 0.0248, "step": 8264, "task_loss": 0.10833414644002914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993148196912, "compression_loss": 0.0, "distillation_loss": 0.17332197725772858, "epoch": 7.85, "learning_rate": 4.822679717475237e-05, "loss": 0.1677, "step": 8265, "task_loss": 0.11748655885457993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993275866947, "compression_loss": 0.0, "distillation_loss": 0.031857073307037354, "epoch": 7.85, "learning_rate": 4.8222853698214076e-05, "loss": 0.0292, "step": 8266, "task_loss": 0.005062129348516464 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993401941128, "compression_loss": 0.0, "distillation_loss": 0.02804490551352501, "epoch": 7.85, "learning_rate": 4.8218906003132555e-05, "loss": 0.0305, "step": 8267, "task_loss": 0.05293525010347366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993526429491, "compression_loss": 0.0, "distillation_loss": 0.024678591638803482, "epoch": 7.85, "learning_rate": 4.8214954090224946e-05, "loss": 0.0295, "step": 8268, "task_loss": 0.07294875383377075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999993649342072, "compression_loss": 0.0, "distillation_loss": 0.08018738776445389, "epoch": 7.85, "learning_rate": 4.8210997960209114e-05, "loss": 0.0856, "step": 8269, "task_loss": 0.13434135913848877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999377068891, "compression_loss": 0.0, "distillation_loss": 0.021488351747393608, "epoch": 7.85, "learning_rate": 4.8207037613803715e-05, "loss": 0.0198, "step": 8270, "task_loss": 0.00473182275891304 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999389048004, "compression_loss": 0.0, "distillation_loss": 0.1669892966747284, "epoch": 7.85, "learning_rate": 4.820307305172818e-05, "loss": 0.1599, "step": 8271, "task_loss": 0.09594349563121796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994008725501, "compression_loss": 0.0, "distillation_loss": 0.039625514298677444, "epoch": 7.86, "learning_rate": 4.8199104274702666e-05, "loss": 0.054, "step": 8272, "task_loss": 0.18374371528625488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994125435328, "compression_loss": 0.0, "distillation_loss": 0.01832488365471363, "epoch": 7.86, "learning_rate": 4.819513128344814e-05, "loss": 0.0221, "step": 8273, "task_loss": 0.055578745901584625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994240619557, "compression_loss": 0.0, "distillation_loss": 0.1487281322479248, "epoch": 7.86, "learning_rate": 4.8191154078686306e-05, "loss": 0.1573, "step": 8274, "task_loss": 0.23400062322616577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994354288227, "compression_loss": 0.0, "distillation_loss": 0.101641446352005, "epoch": 7.86, "learning_rate": 4.8187172661139636e-05, "loss": 0.1103, "step": 8275, "task_loss": 0.18786770105361938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994466451373, "compression_loss": 0.0, "distillation_loss": 0.019857624545693398, "epoch": 7.86, "learning_rate": 4.818318703153139e-05, "loss": 0.0187, "step": 8276, "task_loss": 0.00832618772983551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994577119034, "compression_loss": 0.0, "distillation_loss": 0.034556448459625244, "epoch": 7.86, "learning_rate": 4.817919719058557e-05, "loss": 0.0398, "step": 8277, "task_loss": 0.08679534494876862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994686301245, "compression_loss": 0.0, "distillation_loss": 0.08879555761814117, "epoch": 7.86, "learning_rate": 4.8175203139026934e-05, "loss": 0.0846, "step": 8278, "task_loss": 0.046484023332595825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994794008043, "compression_loss": 0.0, "distillation_loss": 0.035275399684906006, "epoch": 7.86, "learning_rate": 4.817120487758104e-05, "loss": 0.0353, "step": 8279, "task_loss": 0.03573717176914215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999994900249465, "compression_loss": 0.0, "distillation_loss": 0.05630365014076233, "epoch": 7.86, "learning_rate": 4.81672024069742e-05, "loss": 0.0763, "step": 8280, "task_loss": 0.2562328577041626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999500503555, "compression_loss": 0.0, "distillation_loss": 0.03214915469288826, "epoch": 7.86, "learning_rate": 4.816319572793345e-05, "loss": 0.0307, "step": 8281, "task_loss": 0.01762857846915722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999510837633, "compression_loss": 0.0, "distillation_loss": 0.12708397209644318, "epoch": 7.87, "learning_rate": 4.815918484118665e-05, "loss": 0.1273, "step": 8282, "task_loss": 0.1288326233625412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995210281846, "compression_loss": 0.0, "distillation_loss": 0.03236864507198334, "epoch": 7.87, "learning_rate": 4.815516974746239e-05, "loss": 0.0354, "step": 8283, "task_loss": 0.06306421756744385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995310762132, "compression_loss": 0.0, "distillation_loss": 0.20854994654655457, "epoch": 7.87, "learning_rate": 4.815115044749003e-05, "loss": 0.2032, "step": 8284, "task_loss": 0.15457791090011597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995409827229, "compression_loss": 0.0, "distillation_loss": 0.10258646309375763, "epoch": 7.87, "learning_rate": 4.814712694199969e-05, "loss": 0.1004, "step": 8285, "task_loss": 0.08033056557178497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995507487169, "compression_loss": 0.0, "distillation_loss": 0.009589407593011856, "epoch": 7.87, "learning_rate": 4.814309923172227e-05, "loss": 0.0089, "step": 8286, "task_loss": 0.003172697499394417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995603751991, "compression_loss": 0.0, "distillation_loss": 0.07792666554450989, "epoch": 7.87, "learning_rate": 4.81390673173894e-05, "loss": 0.0795, "step": 8287, "task_loss": 0.09386920928955078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995698631732, "compression_loss": 0.0, "distillation_loss": 0.19815564155578613, "epoch": 7.87, "learning_rate": 4.8135031199733524e-05, "loss": 0.2015, "step": 8288, "task_loss": 0.2318621575832367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995792136428, "compression_loss": 0.0, "distillation_loss": 0.06799386441707611, "epoch": 7.87, "learning_rate": 4.813099087948781e-05, "loss": 0.0688, "step": 8289, "task_loss": 0.07573655992746353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995884276118, "compression_loss": 0.0, "distillation_loss": 0.021318640559911728, "epoch": 7.87, "learning_rate": 4.812694635738621e-05, "loss": 0.0198, "step": 8290, "task_loss": 0.0060688890516757965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999995975060836, "compression_loss": 0.0, "distillation_loss": 0.06302770227193832, "epoch": 7.87, "learning_rate": 4.812289763416341e-05, "loss": 0.0652, "step": 8291, "task_loss": 0.08448217064142227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999606450062, "compression_loss": 0.0, "distillation_loss": 0.07862488925457001, "epoch": 7.87, "learning_rate": 4.81188447105549e-05, "loss": 0.0874, "step": 8292, "task_loss": 0.16682052612304688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996152605506, "compression_loss": 0.0, "distillation_loss": 0.024814866483211517, "epoch": 7.88, "learning_rate": 4.811478758729691e-05, "loss": 0.0228, "step": 8293, "task_loss": 0.004621252417564392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996239385534, "compression_loss": 0.0, "distillation_loss": 0.04633516073226929, "epoch": 7.88, "learning_rate": 4.811072626512642e-05, "loss": 0.0477, "step": 8294, "task_loss": 0.06027062237262726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996324850737, "compression_loss": 0.0, "distillation_loss": 0.12473642826080322, "epoch": 7.88, "learning_rate": 4.810666074478121e-05, "loss": 0.123, "step": 8295, "task_loss": 0.1078307181596756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996409011153, "compression_loss": 0.0, "distillation_loss": 0.0652468129992485, "epoch": 7.88, "learning_rate": 4.8102591026999796e-05, "loss": 0.0725, "step": 8296, "task_loss": 0.1382291316986084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999649187682, "compression_loss": 0.0, "distillation_loss": 0.011844221502542496, "epoch": 7.88, "learning_rate": 4.8098517112521456e-05, "loss": 0.025, "step": 8297, "task_loss": 0.1432102769613266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996573457773, "compression_loss": 0.0, "distillation_loss": 0.11007647216320038, "epoch": 7.88, "learning_rate": 4.8094439002086234e-05, "loss": 0.1154, "step": 8298, "task_loss": 0.16329604387283325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999665376405, "compression_loss": 0.0, "distillation_loss": 0.03129512071609497, "epoch": 7.88, "learning_rate": 4.809035669643495e-05, "loss": 0.0287, "step": 8299, "task_loss": 0.004948470741510391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996732805688, "compression_loss": 0.0, "distillation_loss": 0.03155820071697235, "epoch": 7.88, "learning_rate": 4.808627019630917e-05, "loss": 0.0389, "step": 8300, "task_loss": 0.10458790510892868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996810592722, "compression_loss": 0.0, "distillation_loss": 0.018336599692702293, "epoch": 7.88, "learning_rate": 4.808217950245122e-05, "loss": 0.0173, "step": 8301, "task_loss": 0.008344225585460663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996887135191, "compression_loss": 0.0, "distillation_loss": 0.1453208029270172, "epoch": 7.88, "learning_rate": 4.807808461560419e-05, "loss": 0.153, "step": 8302, "task_loss": 0.22258791327476501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999996962443132, "compression_loss": 0.0, "distillation_loss": 0.026547754183411598, "epoch": 7.89, "learning_rate": 4.8073985536511956e-05, "loss": 0.0299, "step": 8303, "task_loss": 0.059667013585567474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997036526579, "compression_loss": 0.0, "distillation_loss": 0.054472848773002625, "epoch": 7.89, "learning_rate": 4.806988226591912e-05, "loss": 0.0525, "step": 8304, "task_loss": 0.03490680456161499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997109395572, "compression_loss": 0.0, "distillation_loss": 0.030267203226685524, "epoch": 7.89, "learning_rate": 4.806577480457106e-05, "loss": 0.0307, "step": 8305, "task_loss": 0.034910961985588074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997181060147, "compression_loss": 0.0, "distillation_loss": 0.08685198426246643, "epoch": 7.89, "learning_rate": 4.8061663153213935e-05, "loss": 0.0862, "step": 8306, "task_loss": 0.07995598018169403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999725153034, "compression_loss": 0.0, "distillation_loss": 0.05820311978459358, "epoch": 7.89, "learning_rate": 4.805754731259462e-05, "loss": 0.0552, "step": 8307, "task_loss": 0.02777511440217495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997320816187, "compression_loss": 0.0, "distillation_loss": 0.0553501695394516, "epoch": 7.89, "learning_rate": 4.805342728346079e-05, "loss": 0.0533, "step": 8308, "task_loss": 0.034611959010362625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997388927726, "compression_loss": 0.0, "distillation_loss": 0.06473010778427124, "epoch": 7.89, "learning_rate": 4.804930306656087e-05, "loss": 0.0656, "step": 8309, "task_loss": 0.07328800857067108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997455874995, "compression_loss": 0.0, "distillation_loss": 0.052684955298900604, "epoch": 7.89, "learning_rate": 4.804517466264405e-05, "loss": 0.055, "step": 8310, "task_loss": 0.07596210390329361 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997521668029, "compression_loss": 0.0, "distillation_loss": 0.1126926839351654, "epoch": 7.89, "learning_rate": 4.8041042072460244e-05, "loss": 0.1187, "step": 8311, "task_loss": 0.17313697934150696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997586316866, "compression_loss": 0.0, "distillation_loss": 0.2039157748222351, "epoch": 7.89, "learning_rate": 4.803690529676019e-05, "loss": 0.2047, "step": 8312, "task_loss": 0.211942657828331 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997649831541, "compression_loss": 0.0, "distillation_loss": 0.04965321347117424, "epoch": 7.89, "learning_rate": 4.803276433629534e-05, "loss": 0.0641, "step": 8313, "task_loss": 0.19365498423576355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997712222093, "compression_loss": 0.0, "distillation_loss": 0.028864435851573944, "epoch": 7.9, "learning_rate": 4.802861919181793e-05, "loss": 0.0268, "step": 8314, "task_loss": 0.007797591388225555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997773498557, "compression_loss": 0.0, "distillation_loss": 0.06014259159564972, "epoch": 7.9, "learning_rate": 4.802446986408093e-05, "loss": 0.0624, "step": 8315, "task_loss": 0.0831136405467987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997833670972, "compression_loss": 0.0, "distillation_loss": 0.03688354790210724, "epoch": 7.9, "learning_rate": 4.8020316353838095e-05, "loss": 0.0529, "step": 8316, "task_loss": 0.19723045825958252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997892749372, "compression_loss": 0.0, "distillation_loss": 0.029626084491610527, "epoch": 7.9, "learning_rate": 4.8016158661843926e-05, "loss": 0.0273, "step": 8317, "task_loss": 0.006546778604388237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999997950743797, "compression_loss": 0.0, "distillation_loss": 0.02643163688480854, "epoch": 7.9, "learning_rate": 4.8011996788853686e-05, "loss": 0.0339, "step": 8318, "task_loss": 0.10074707865715027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998007664281, "compression_loss": 0.0, "distillation_loss": 0.021654170006513596, "epoch": 7.9, "learning_rate": 4.80078307356234e-05, "loss": 0.0306, "step": 8319, "task_loss": 0.1115964949131012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998063520862, "compression_loss": 0.0, "distillation_loss": 0.020197220146656036, "epoch": 7.9, "learning_rate": 4.800366050290986e-05, "loss": 0.0187, "step": 8320, "task_loss": 0.005556017160415649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998118323577, "compression_loss": 0.0, "distillation_loss": 0.015532903373241425, "epoch": 7.9, "learning_rate": 4.799948609147061e-05, "loss": 0.0234, "step": 8321, "task_loss": 0.09371798485517502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998172082463, "compression_loss": 0.0, "distillation_loss": 0.041199732571840286, "epoch": 7.9, "learning_rate": 4.7995307502063936e-05, "loss": 0.0383, "step": 8322, "task_loss": 0.011793764308094978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998224807556, "compression_loss": 0.0, "distillation_loss": 0.11037556827068329, "epoch": 7.9, "learning_rate": 4.799112473544891e-05, "loss": 0.1096, "step": 8323, "task_loss": 0.1021459624171257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998276508894, "compression_loss": 0.0, "distillation_loss": 0.05237120762467384, "epoch": 7.91, "learning_rate": 4.7986937792385344e-05, "loss": 0.0535, "step": 8324, "task_loss": 0.0640583410859108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998327196512, "compression_loss": 0.0, "distillation_loss": 0.0475940927863121, "epoch": 7.91, "learning_rate": 4.798274667363383e-05, "loss": 0.0521, "step": 8325, "task_loss": 0.09266771376132965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998376880448, "compression_loss": 0.0, "distillation_loss": 0.022809145972132683, "epoch": 7.91, "learning_rate": 4.7978551379955684e-05, "loss": 0.0212, "step": 8326, "task_loss": 0.006463898345828056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999842557074, "compression_loss": 0.0, "distillation_loss": 0.11664307862520218, "epoch": 7.91, "learning_rate": 4.797435191211302e-05, "loss": 0.1186, "step": 8327, "task_loss": 0.13656139373779297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998473277422, "compression_loss": 0.0, "distillation_loss": 0.047932952642440796, "epoch": 7.91, "learning_rate": 4.797014827086869e-05, "loss": 0.0526, "step": 8328, "task_loss": 0.09455284476280212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998520010533, "compression_loss": 0.0, "distillation_loss": 0.0719044953584671, "epoch": 7.91, "learning_rate": 4.79659404569863e-05, "loss": 0.0775, "step": 8329, "task_loss": 0.1278771162033081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999856578011, "compression_loss": 0.0, "distillation_loss": 0.010797183960676193, "epoch": 7.91, "learning_rate": 4.7961728471230214e-05, "loss": 0.0099, "step": 8330, "task_loss": 0.0018469560891389847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998610596187, "compression_loss": 0.0, "distillation_loss": 0.0197126641869545, "epoch": 7.91, "learning_rate": 4.7957512314365574e-05, "loss": 0.0256, "step": 8331, "task_loss": 0.07906901836395264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998654468805, "compression_loss": 0.0, "distillation_loss": 0.02190200798213482, "epoch": 7.91, "learning_rate": 4.7953291987158254e-05, "loss": 0.0201, "step": 8332, "task_loss": 0.003983369097113609 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998697407997, "compression_loss": 0.0, "distillation_loss": 0.15597021579742432, "epoch": 7.91, "learning_rate": 4.79490674903749e-05, "loss": 0.1605, "step": 8333, "task_loss": 0.20110704004764557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998739423803, "compression_loss": 0.0, "distillation_loss": 0.020552337169647217, "epoch": 7.91, "learning_rate": 4.7944838824782916e-05, "loss": 0.0292, "step": 8334, "task_loss": 0.10749036073684692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998780526257, "compression_loss": 0.0, "distillation_loss": 0.02331959456205368, "epoch": 7.92, "learning_rate": 4.794060599115045e-05, "loss": 0.0217, "step": 8335, "task_loss": 0.0067675188183784485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998820725398, "compression_loss": 0.0, "distillation_loss": 0.012297404929995537, "epoch": 7.92, "learning_rate": 4.793636899024643e-05, "loss": 0.019, "step": 8336, "task_loss": 0.07974053174257278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998860031262, "compression_loss": 0.0, "distillation_loss": 0.17458131909370422, "epoch": 7.92, "learning_rate": 4.7932127822840516e-05, "loss": 0.1665, "step": 8337, "task_loss": 0.09329235553741455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998898453885, "compression_loss": 0.0, "distillation_loss": 0.016357656568288803, "epoch": 7.92, "learning_rate": 4.792788248970314e-05, "loss": 0.0217, "step": 8338, "task_loss": 0.06964881718158722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998936003305, "compression_loss": 0.0, "distillation_loss": 0.10674792528152466, "epoch": 7.92, "learning_rate": 4.79236329916055e-05, "loss": 0.1097, "step": 8339, "task_loss": 0.13648146390914917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999998972689558, "compression_loss": 0.0, "distillation_loss": 0.014713255688548088, "epoch": 7.92, "learning_rate": 4.79193793293195e-05, "loss": 0.0227, "step": 8340, "task_loss": 0.09422945976257324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999008522681, "compression_loss": 0.0, "distillation_loss": 0.014077425003051758, "epoch": 7.92, "learning_rate": 4.791512150361788e-05, "loss": 0.0292, "step": 8341, "task_loss": 0.1649925708770752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999043512712, "compression_loss": 0.0, "distillation_loss": 0.08245790749788284, "epoch": 7.92, "learning_rate": 4.791085951527408e-05, "loss": 0.0749, "step": 8342, "task_loss": 0.006448717787861824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999077669687, "compression_loss": 0.0, "distillation_loss": 0.08674632757902145, "epoch": 7.92, "learning_rate": 4.7906593365062304e-05, "loss": 0.0895, "step": 8343, "task_loss": 0.11430380493402481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999111003642, "compression_loss": 0.0, "distillation_loss": 0.06291348487138748, "epoch": 7.92, "learning_rate": 4.790232305375752e-05, "loss": 0.061, "step": 8344, "task_loss": 0.04396482929587364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999143524614, "compression_loss": 0.0, "distillation_loss": 0.07400640845298767, "epoch": 7.92, "learning_rate": 4.789804858213547e-05, "loss": 0.0753, "step": 8345, "task_loss": 0.08731956779956818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999175242641, "compression_loss": 0.0, "distillation_loss": 0.014547398313879967, "epoch": 7.93, "learning_rate": 4.7893769950972605e-05, "loss": 0.0196, "step": 8346, "task_loss": 0.06489875912666321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999206167759, "compression_loss": 0.0, "distillation_loss": 0.02471366710960865, "epoch": 7.93, "learning_rate": 4.788948716104618e-05, "loss": 0.0266, "step": 8347, "task_loss": 0.043872442096471786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999236310006, "compression_loss": 0.0, "distillation_loss": 0.032343026250600815, "epoch": 7.93, "learning_rate": 4.7885200213134164e-05, "loss": 0.0309, "step": 8348, "task_loss": 0.01805000938475132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999265679416, "compression_loss": 0.0, "distillation_loss": 0.16060638427734375, "epoch": 7.93, "learning_rate": 4.788090910801532e-05, "loss": 0.1635, "step": 8349, "task_loss": 0.18905338644981384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999294286029, "compression_loss": 0.0, "distillation_loss": 0.021082771942019463, "epoch": 7.93, "learning_rate": 4.787661384646913e-05, "loss": 0.0198, "step": 8350, "task_loss": 0.008079813793301582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999932213988, "compression_loss": 0.0, "distillation_loss": 0.09513162821531296, "epoch": 7.93, "learning_rate": 4.787231442927587e-05, "loss": 0.1033, "step": 8351, "task_loss": 0.17651014029979706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999349251006, "compression_loss": 0.0, "distillation_loss": 0.026541750878095627, "epoch": 7.93, "learning_rate": 4.786801085721654e-05, "loss": 0.0317, "step": 8352, "task_loss": 0.07830867916345596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999375629444, "compression_loss": 0.0, "distillation_loss": 0.052206460386514664, "epoch": 7.93, "learning_rate": 4.78637031310729e-05, "loss": 0.0684, "step": 8353, "task_loss": 0.21389958262443542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999401285232, "compression_loss": 0.0, "distillation_loss": 0.020280838012695312, "epoch": 7.93, "learning_rate": 4.7859391251627474e-05, "loss": 0.0348, "step": 8354, "task_loss": 0.16505402326583862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999426228404, "compression_loss": 0.0, "distillation_loss": 0.04192467778921127, "epoch": 7.93, "learning_rate": 4.7855075219663535e-05, "loss": 0.0557, "step": 8355, "task_loss": 0.17976054549217224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999450469, "compression_loss": 0.0, "distillation_loss": 0.029960088431835175, "epoch": 7.94, "learning_rate": 4.785075503596511e-05, "loss": 0.0296, "step": 8356, "task_loss": 0.02586023323237896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999474017054, "compression_loss": 0.0, "distillation_loss": 0.041166506707668304, "epoch": 7.94, "learning_rate": 4.7846430701316994e-05, "loss": 0.0383, "step": 8357, "task_loss": 0.012875651940703392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999496882605, "compression_loss": 0.0, "distillation_loss": 0.07181466370820999, "epoch": 7.94, "learning_rate": 4.78421022165047e-05, "loss": 0.0763, "step": 8358, "task_loss": 0.11669700592756271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999519075689, "compression_loss": 0.0, "distillation_loss": 0.1549142599105835, "epoch": 7.94, "learning_rate": 4.783776958231453e-05, "loss": 0.1549, "step": 8359, "task_loss": 0.1552504003047943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999540606343, "compression_loss": 0.0, "distillation_loss": 0.0709429532289505, "epoch": 7.94, "learning_rate": 4.783343279953353e-05, "loss": 0.0802, "step": 8360, "task_loss": 0.16332530975341797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999561484603, "compression_loss": 0.0, "distillation_loss": 0.05141635984182358, "epoch": 7.94, "learning_rate": 4.782909186894949e-05, "loss": 0.048, "step": 8361, "task_loss": 0.017139893025159836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999581720506, "compression_loss": 0.0, "distillation_loss": 0.08039192855358124, "epoch": 7.94, "learning_rate": 4.782474679135097e-05, "loss": 0.0835, "step": 8362, "task_loss": 0.11189291626214981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999960132409, "compression_loss": 0.0, "distillation_loss": 0.03790656104683876, "epoch": 7.94, "learning_rate": 4.782039756752727e-05, "loss": 0.0349, "step": 8363, "task_loss": 0.008079200983047485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999620305391, "compression_loss": 0.0, "distillation_loss": 0.03653489425778389, "epoch": 7.94, "learning_rate": 4.781604419826845e-05, "loss": 0.043, "step": 8364, "task_loss": 0.10079368948936462 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999638674447, "compression_loss": 0.0, "distillation_loss": 0.06203051656484604, "epoch": 7.94, "learning_rate": 4.781168668436532e-05, "loss": 0.0605, "step": 8365, "task_loss": 0.04713946580886841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999656441292, "compression_loss": 0.0, "distillation_loss": 0.1302792727947235, "epoch": 7.94, "learning_rate": 4.780732502660943e-05, "loss": 0.1366, "step": 8366, "task_loss": 0.1930633783340454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999673615966, "compression_loss": 0.0, "distillation_loss": 0.1713554859161377, "epoch": 7.95, "learning_rate": 4.780295922579312e-05, "loss": 0.1854, "step": 8367, "task_loss": 0.3117556571960449 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999690208504, "compression_loss": 0.0, "distillation_loss": 0.08704891055822372, "epoch": 7.95, "learning_rate": 4.779858928270944e-05, "loss": 0.0917, "step": 8368, "task_loss": 0.13358929753303528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999706228943, "compression_loss": 0.0, "distillation_loss": 0.12303698807954788, "epoch": 7.95, "learning_rate": 4.7794215198152216e-05, "loss": 0.1175, "step": 8369, "task_loss": 0.06791896373033524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999972168732, "compression_loss": 0.0, "distillation_loss": 0.09498666226863861, "epoch": 7.95, "learning_rate": 4.778983697291603e-05, "loss": 0.091, "step": 8370, "task_loss": 0.05467312037944794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999736593671, "compression_loss": 0.0, "distillation_loss": 0.0775984525680542, "epoch": 7.95, "learning_rate": 4.7785454607796195e-05, "loss": 0.0746, "step": 8371, "task_loss": 0.04722478240728378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999750958036, "compression_loss": 0.0, "distillation_loss": 0.09941750019788742, "epoch": 7.95, "learning_rate": 4.77810681035888e-05, "loss": 0.0948, "step": 8372, "task_loss": 0.053179264068603516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999764790447, "compression_loss": 0.0, "distillation_loss": 0.08102773874998093, "epoch": 7.95, "learning_rate": 4.777667746109067e-05, "loss": 0.0827, "step": 8373, "task_loss": 0.09824100136756897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999778100945, "compression_loss": 0.0, "distillation_loss": 0.06384574621915817, "epoch": 7.95, "learning_rate": 4.7772282681099377e-05, "loss": 0.0756, "step": 8374, "task_loss": 0.18099090456962585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999790899565, "compression_loss": 0.0, "distillation_loss": 0.024305138736963272, "epoch": 7.95, "learning_rate": 4.7767883764413266e-05, "loss": 0.0307, "step": 8375, "task_loss": 0.08778215944766998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999803196343, "compression_loss": 0.0, "distillation_loss": 0.029779810458421707, "epoch": 7.95, "learning_rate": 4.776348071183142e-05, "loss": 0.0478, "step": 8376, "task_loss": 0.20959466695785522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999815001317, "compression_loss": 0.0, "distillation_loss": 0.06962715834379196, "epoch": 7.96, "learning_rate": 4.775907352415367e-05, "loss": 0.0651, "step": 8377, "task_loss": 0.0247165709733963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999826324524, "compression_loss": 0.0, "distillation_loss": 0.14432743191719055, "epoch": 7.96, "learning_rate": 4.7754662202180606e-05, "loss": 0.1392, "step": 8378, "task_loss": 0.0935506820678711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999837176001, "compression_loss": 0.0, "distillation_loss": 0.019188987091183662, "epoch": 7.96, "learning_rate": 4.7750246746713565e-05, "loss": 0.0178, "step": 8379, "task_loss": 0.005380744114518166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999847565783, "compression_loss": 0.0, "distillation_loss": 0.04302148520946503, "epoch": 7.96, "learning_rate": 4.7745827158554634e-05, "loss": 0.071, "step": 8380, "task_loss": 0.3228791356086731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999857503908, "compression_loss": 0.0, "distillation_loss": 0.033299144357442856, "epoch": 7.96, "learning_rate": 4.774140343850666e-05, "loss": 0.0338, "step": 8381, "task_loss": 0.03854818642139435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999867000414, "compression_loss": 0.0, "distillation_loss": 0.16388925909996033, "epoch": 7.96, "learning_rate": 4.773697558737322e-05, "loss": 0.1616, "step": 8382, "task_loss": 0.14093096554279327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999876065336, "compression_loss": 0.0, "distillation_loss": 0.11715799570083618, "epoch": 7.96, "learning_rate": 4.773254360595867e-05, "loss": 0.1141, "step": 8383, "task_loss": 0.08705883473157883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999884708712, "compression_loss": 0.0, "distillation_loss": 0.07380795478820801, "epoch": 7.96, "learning_rate": 4.77281074950681e-05, "loss": 0.0678, "step": 8384, "task_loss": 0.013948189094662666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999892940577, "compression_loss": 0.0, "distillation_loss": 0.15698346495628357, "epoch": 7.96, "learning_rate": 4.7723667255507334e-05, "loss": 0.1518, "step": 8385, "task_loss": 0.10555059462785721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999990077097, "compression_loss": 0.0, "distillation_loss": 0.02515373006463051, "epoch": 7.96, "learning_rate": 4.771922288808297e-05, "loss": 0.0234, "step": 8386, "task_loss": 0.007526658475399017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999908209927, "compression_loss": 0.0, "distillation_loss": 0.015803663060069084, "epoch": 7.96, "learning_rate": 4.771477439360235e-05, "loss": 0.0145, "step": 8387, "task_loss": 0.002915032207965851 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999915267485, "compression_loss": 0.0, "distillation_loss": 0.12163722515106201, "epoch": 7.97, "learning_rate": 4.7710321772873566e-05, "loss": 0.1286, "step": 8388, "task_loss": 0.19121067225933075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999921953681, "compression_loss": 0.0, "distillation_loss": 0.04631570726633072, "epoch": 7.97, "learning_rate": 4.770586502670546e-05, "loss": 0.054, "step": 8389, "task_loss": 0.12329075485467911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999992827855, "compression_loss": 0.0, "distillation_loss": 0.15134918689727783, "epoch": 7.97, "learning_rate": 4.770140415590762e-05, "loss": 0.1518, "step": 8390, "task_loss": 0.15575477480888367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999934252132, "compression_loss": 0.0, "distillation_loss": 0.07986609637737274, "epoch": 7.97, "learning_rate": 4.769693916129039e-05, "loss": 0.0843, "step": 8391, "task_loss": 0.12397737801074982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999939884461, "compression_loss": 0.0, "distillation_loss": 0.09135753661394119, "epoch": 7.97, "learning_rate": 4.769247004366485e-05, "loss": 0.0998, "step": 8392, "task_loss": 0.1758865863084793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999945185575, "compression_loss": 0.0, "distillation_loss": 0.029954630881547928, "epoch": 7.97, "learning_rate": 4.768799680384283e-05, "loss": 0.0286, "step": 8393, "task_loss": 0.016476107761263847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999950165512, "compression_loss": 0.0, "distillation_loss": 0.1209501326084137, "epoch": 7.97, "learning_rate": 4.768351944263693e-05, "loss": 0.1188, "step": 8394, "task_loss": 0.09983272105455399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999954834306, "compression_loss": 0.0, "distillation_loss": 0.12946045398712158, "epoch": 7.97, "learning_rate": 4.767903796086048e-05, "loss": 0.1457, "step": 8395, "task_loss": 0.29160839319229126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999959201997, "compression_loss": 0.0, "distillation_loss": 0.03893984854221344, "epoch": 7.97, "learning_rate": 4.767455235932756e-05, "loss": 0.0484, "step": 8396, "task_loss": 0.13344644010066986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999963278618, "compression_loss": 0.0, "distillation_loss": 0.05474621057510376, "epoch": 7.97, "learning_rate": 4.7670062638853e-05, "loss": 0.0528, "step": 8397, "task_loss": 0.03571630269289017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999996707421, "compression_loss": 0.0, "distillation_loss": 0.05862941965460777, "epoch": 7.98, "learning_rate": 4.766556880025238e-05, "loss": 0.0592, "step": 8398, "task_loss": 0.06433902680873871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999970598807, "compression_loss": 0.0, "distillation_loss": 0.022588767111301422, "epoch": 7.98, "learning_rate": 4.7661070844342033e-05, "loss": 0.0212, "step": 8399, "task_loss": 0.008752534165978432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999973862446, "compression_loss": 0.0, "distillation_loss": 0.0467553474009037, "epoch": 7.98, "learning_rate": 4.7656568771939024e-05, "loss": 0.0514, "step": 8400, "task_loss": 0.09337884187698364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999976875165, "compression_loss": 0.0, "distillation_loss": 0.11045566201210022, "epoch": 7.98, "learning_rate": 4.765206258386119e-05, "loss": 0.1095, "step": 8401, "task_loss": 0.10068619251251221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999979647, "compression_loss": 0.0, "distillation_loss": 0.022844959050416946, "epoch": 7.98, "learning_rate": 4.7647552280927086e-05, "loss": 0.0213, "step": 8402, "task_loss": 0.007824547588825226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999982187989, "compression_loss": 0.0, "distillation_loss": 0.038698434829711914, "epoch": 7.98, "learning_rate": 4.764303786395604e-05, "loss": 0.0406, "step": 8403, "task_loss": 0.05789912864565849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999984508167, "compression_loss": 0.0, "distillation_loss": 0.043944694101810455, "epoch": 7.98, "learning_rate": 4.763851933376812e-05, "loss": 0.0417, "step": 8404, "task_loss": 0.02184183895587921 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999986617573, "compression_loss": 0.0, "distillation_loss": 0.012151426635682583, "epoch": 7.98, "learning_rate": 4.763399669118414e-05, "loss": 0.0115, "step": 8405, "task_loss": 0.005970221012830734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999988526242, "compression_loss": 0.0, "distillation_loss": 0.0856751874089241, "epoch": 7.98, "learning_rate": 4.762946993702565e-05, "loss": 0.0912, "step": 8406, "task_loss": 0.1414024531841278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999990244211, "compression_loss": 0.0, "distillation_loss": 0.059896957129240036, "epoch": 7.98, "learning_rate": 4.7624939072114954e-05, "loss": 0.0598, "step": 8407, "task_loss": 0.05849459767341614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999991781517, "compression_loss": 0.0, "distillation_loss": 0.1813950538635254, "epoch": 7.98, "learning_rate": 4.762040409727512e-05, "loss": 0.1745, "step": 8408, "task_loss": 0.11278204619884491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999993148197, "compression_loss": 0.0, "distillation_loss": 0.11629247665405273, "epoch": 7.99, "learning_rate": 4.761586501332994e-05, "loss": 0.1225, "step": 8409, "task_loss": 0.17883270978927612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999994354289, "compression_loss": 0.0, "distillation_loss": 0.019268091768026352, "epoch": 7.99, "learning_rate": 4.7611321821103954e-05, "loss": 0.0178, "step": 8410, "task_loss": 0.0050684306770563126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999995409828, "compression_loss": 0.0, "distillation_loss": 0.03275691345334053, "epoch": 7.99, "learning_rate": 4.760677452142247e-05, "loss": 0.0434, "step": 8411, "task_loss": 0.13962477445602417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999996324851, "compression_loss": 0.0, "distillation_loss": 0.0782676413655281, "epoch": 7.99, "learning_rate": 4.760222311511152e-05, "loss": 0.0755, "step": 8412, "task_loss": 0.05032962188124657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999997109396, "compression_loss": 0.0, "distillation_loss": 0.03344403952360153, "epoch": 7.99, "learning_rate": 4.759766760299788e-05, "loss": 0.0368, "step": 8413, "task_loss": 0.06691578030586243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999997773499, "compression_loss": 0.0, "distillation_loss": 0.027902770787477493, "epoch": 7.99, "learning_rate": 4.759310798590909e-05, "loss": 0.0359, "step": 8414, "task_loss": 0.10809982568025589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999998327197, "compression_loss": 0.0, "distillation_loss": 0.0703706368803978, "epoch": 7.99, "learning_rate": 4.758854426467343e-05, "loss": 0.0753, "step": 8415, "task_loss": 0.11971607804298401 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999998780527, "compression_loss": 0.0, "distillation_loss": 0.06848819553852081, "epoch": 7.99, "learning_rate": 4.758397644011992e-05, "loss": 0.0691, "step": 8416, "task_loss": 0.07492919266223907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999143526, "compression_loss": 0.0, "distillation_loss": 0.030622560530900955, "epoch": 7.99, "learning_rate": 4.757940451307831e-05, "loss": 0.0384, "step": 8417, "task_loss": 0.10844360291957855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999426228, "compression_loss": 0.0, "distillation_loss": 0.06612611562013626, "epoch": 7.99, "learning_rate": 4.757482848437914e-05, "loss": 0.0736, "step": 8418, "task_loss": 0.14050063490867615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999638675, "compression_loss": 0.0, "distillation_loss": 0.0231521874666214, "epoch": 8.0, "learning_rate": 4.7570248354853644e-05, "loss": 0.0454, "step": 8419, "task_loss": 0.2456911951303482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.79999999997909, "compression_loss": 0.0, "distillation_loss": 0.027434296905994415, "epoch": 8.0, "learning_rate": 4.7565664125333845e-05, "loss": 0.0334, "step": 8420, "task_loss": 0.08753321319818497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.799999999989294, "compression_loss": 0.0, "distillation_loss": 0.07552851736545563, "epoch": 8.0, "learning_rate": 4.7561075796652464e-05, "loss": 0.0764, "step": 8421, "task_loss": 0.08394578844308853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999954834, "compression_loss": 0.0, "distillation_loss": 0.02573383040726185, "epoch": 8.0, "learning_rate": 4.755648336964302e-05, "loss": 0.0239, "step": 8422, "task_loss": 0.0073777977377176285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999986618, "compression_loss": 0.0, "distillation_loss": 0.06676770746707916, "epoch": 8.0, "learning_rate": 4.7551886845139743e-05, "loss": 0.089, "step": 8423, "task_loss": 0.28941696882247925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6247876100877703, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.7998046816538257, "compression/magnitude_sparsity/target_sparsity_level": 0.7999999999998327, "compression_loss": 0.0, "distillation_loss": 0.007792739663273096, "epoch": 8.0, "learning_rate": 4.754728622397761e-05, "loss": 0.0073, "step": 8424, "task_loss": 0.0028934250585734844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1277932971715927, "epoch": 8.0, "learning_rate": 4.754268150699234e-05, "loss": 0.12, "step": 8425, "task_loss": 0.049523890018463135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.18691036105155945, "epoch": 8.0, "learning_rate": 4.753807269502041e-05, "loss": 0.1797, "step": 8426, "task_loss": 0.11476030945777893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11132822930812836, "epoch": 8.0, "learning_rate": 4.7533459788899026e-05, "loss": 0.1066, "step": 8427, "task_loss": 0.0645451620221138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06175380200147629, "epoch": 8.0, "learning_rate": 4.752884278946614e-05, "loss": 0.0701, "step": 8428, "task_loss": 0.14483240246772766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10747231543064117, "epoch": 8.0, "learning_rate": 4.752422169756048e-05, "loss": 0.1015, "step": 8429, "task_loss": 0.0479457788169384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15526169538497925, "epoch": 8.01, "learning_rate": 4.7519596514021464e-05, "loss": 0.1475, "step": 8430, "task_loss": 0.07779216766357422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12560060620307922, "epoch": 8.01, "learning_rate": 4.751496723968929e-05, "loss": 0.1216, "step": 8431, "task_loss": 0.08531402051448822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06614533811807632, "epoch": 8.01, "learning_rate": 4.751033387540488e-05, "loss": 0.0747, "step": 8432, "task_loss": 0.151977077126503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.19305379688739777, "epoch": 8.01, "learning_rate": 4.7505696422009904e-05, "loss": 0.1905, "step": 8433, "task_loss": 0.16793277859687805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05545946955680847, "epoch": 8.01, "learning_rate": 4.750105488034679e-05, "loss": 0.0656, "step": 8434, "task_loss": 0.15694250166416168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09607134759426117, "epoch": 8.01, "learning_rate": 4.749640925125869e-05, "loss": 0.1036, "step": 8435, "task_loss": 0.17179882526397705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036613885313272476, "epoch": 8.01, "learning_rate": 4.749175953558951e-05, "loss": 0.0342, "step": 8436, "task_loss": 0.012063302099704742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1419782042503357, "epoch": 8.01, "learning_rate": 4.748710573418388e-05, "loss": 0.1527, "step": 8437, "task_loss": 0.24885791540145874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016515720635652542, "epoch": 8.01, "learning_rate": 4.7482447847887204e-05, "loss": 0.0154, "step": 8438, "task_loss": 0.00573185458779335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017054930329322815, "epoch": 8.01, "learning_rate": 4.747778587754559e-05, "loss": 0.0218, "step": 8439, "task_loss": 0.06488415598869324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03005680814385414, "epoch": 8.02, "learning_rate": 4.7473119824005926e-05, "loss": 0.0275, "step": 8440, "task_loss": 0.004504000768065453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05955897271633148, "epoch": 8.02, "learning_rate": 4.7468449688115806e-05, "loss": 0.0621, "step": 8441, "task_loss": 0.08468262106180191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03443329036235809, "epoch": 8.02, "learning_rate": 4.74637754707236e-05, "loss": 0.0326, "step": 8442, "task_loss": 0.016446424648165703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020430468022823334, "epoch": 8.02, "learning_rate": 4.7459097172678386e-05, "loss": 0.0197, "step": 8443, "task_loss": 0.013592688366770744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12642060220241547, "epoch": 8.02, "learning_rate": 4.745441479483001e-05, "loss": 0.1201, "step": 8444, "task_loss": 0.06335929036140442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06663849949836731, "epoch": 8.02, "learning_rate": 4.744972833802904e-05, "loss": 0.0631, "step": 8445, "task_loss": 0.031001247465610504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.3165062665939331, "epoch": 8.02, "learning_rate": 4.74450378031268e-05, "loss": 0.3055, "step": 8446, "task_loss": 0.20671746134757996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07666581869125366, "epoch": 8.02, "learning_rate": 4.744034319097535e-05, "loss": 0.0726, "step": 8447, "task_loss": 0.035642966628074646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.050358712673187256, "epoch": 8.02, "learning_rate": 4.743564450242749e-05, "loss": 0.0464, "step": 8448, "task_loss": 0.010279197245836258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017902474850416183, "epoch": 8.02, "learning_rate": 4.7430941738336745e-05, "loss": 0.0164, "step": 8449, "task_loss": 0.002874387428164482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022135000675916672, "epoch": 8.02, "learning_rate": 4.742623489955741e-05, "loss": 0.0304, "step": 8450, "task_loss": 0.10519418120384216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022276829928159714, "epoch": 8.03, "learning_rate": 4.74215239869445e-05, "loss": 0.0206, "step": 8451, "task_loss": 0.005232140421867371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07043208181858063, "epoch": 8.03, "learning_rate": 4.741680900135377e-05, "loss": 0.0705, "step": 8452, "task_loss": 0.07124508917331696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0691102147102356, "epoch": 8.03, "learning_rate": 4.741208994364173e-05, "loss": 0.0644, "step": 8453, "task_loss": 0.021717606112360954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026366369798779488, "epoch": 8.03, "learning_rate": 4.740736681466561e-05, "loss": 0.0242, "step": 8454, "task_loss": 0.00459631159901619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1180252730846405, "epoch": 8.03, "learning_rate": 4.74026396152834e-05, "loss": 0.119, "step": 8455, "task_loss": 0.1280374825000763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07965636998414993, "epoch": 8.03, "learning_rate": 4.7397908346353796e-05, "loss": 0.0733, "step": 8456, "task_loss": 0.01603274792432785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09866228699684143, "epoch": 8.03, "learning_rate": 4.739317300873628e-05, "loss": 0.0939, "step": 8457, "task_loss": 0.05102086067199707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05824364721775055, "epoch": 8.03, "learning_rate": 4.738843360329104e-05, "loss": 0.0556, "step": 8458, "task_loss": 0.03139631822705269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0658094584941864, "epoch": 8.03, "learning_rate": 4.738369013087902e-05, "loss": 0.0644, "step": 8459, "task_loss": 0.05181333050131798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022087426856160164, "epoch": 8.03, "learning_rate": 4.7378942592361876e-05, "loss": 0.0291, "step": 8460, "task_loss": 0.09194046258926392 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013795844279229641, "epoch": 8.04, "learning_rate": 4.737419098860204e-05, "loss": 0.0216, "step": 8461, "task_loss": 0.09192723780870438 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13639725744724274, "epoch": 8.04, "learning_rate": 4.7369435320462654e-05, "loss": 0.1467, "step": 8462, "task_loss": 0.23894120752811432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07075132429599762, "epoch": 8.04, "learning_rate": 4.73646755888076e-05, "loss": 0.0742, "step": 8463, "task_loss": 0.10508999973535538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031757891178131104, "epoch": 8.04, "learning_rate": 4.7359911794501526e-05, "loss": 0.0294, "step": 8464, "task_loss": 0.007956236600875854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1145702600479126, "epoch": 8.04, "learning_rate": 4.7355143938409785e-05, "loss": 0.1207, "step": 8465, "task_loss": 0.175959050655365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1442631483078003, "epoch": 8.04, "learning_rate": 4.735037202139849e-05, "loss": 0.1416, "step": 8466, "task_loss": 0.1179547980427742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05049190670251846, "epoch": 8.04, "learning_rate": 4.734559604433447e-05, "loss": 0.0476, "step": 8467, "task_loss": 0.0210769884288311 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021424848586320877, "epoch": 8.04, "learning_rate": 4.734081600808531e-05, "loss": 0.0274, "step": 8468, "task_loss": 0.08106733858585358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027790479362010956, "epoch": 8.04, "learning_rate": 4.733603191351933e-05, "loss": 0.0255, "step": 8469, "task_loss": 0.004662582650780678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021773390471935272, "epoch": 8.04, "learning_rate": 4.733124376150558e-05, "loss": 0.027, "step": 8470, "task_loss": 0.07355040311813354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0780288502573967, "epoch": 8.04, "learning_rate": 4.7326451552913856e-05, "loss": 0.0713, "step": 8471, "task_loss": 0.011100053787231445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07223173975944519, "epoch": 8.05, "learning_rate": 4.7321655288614674e-05, "loss": 0.0675, "step": 8472, "task_loss": 0.024789290502667427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020628787577152252, "epoch": 8.05, "learning_rate": 4.7316854969479314e-05, "loss": 0.0269, "step": 8473, "task_loss": 0.08376266062259674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08520027995109558, "epoch": 8.05, "learning_rate": 4.7312050596379764e-05, "loss": 0.0812, "step": 8474, "task_loss": 0.0456637404859066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021483324468135834, "epoch": 8.05, "learning_rate": 4.730724217018877e-05, "loss": 0.0244, "step": 8475, "task_loss": 0.050539180636405945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.28899598121643066, "epoch": 8.05, "learning_rate": 4.7302429691779806e-05, "loss": 0.2756, "step": 8476, "task_loss": 0.154689222574234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016797779127955437, "epoch": 8.05, "learning_rate": 4.729761316202708e-05, "loss": 0.0163, "step": 8477, "task_loss": 0.011920711025595665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02568567357957363, "epoch": 8.05, "learning_rate": 4.729279258180553e-05, "loss": 0.0242, "step": 8478, "task_loss": 0.011267339810729027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04046610742807388, "epoch": 8.05, "learning_rate": 4.7287967951990855e-05, "loss": 0.043, "step": 8479, "task_loss": 0.06550759077072144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061091069132089615, "epoch": 8.05, "learning_rate": 4.7283139273459445e-05, "loss": 0.0739, "step": 8480, "task_loss": 0.1896328330039978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09908688813447952, "epoch": 8.05, "learning_rate": 4.727830654708848e-05, "loss": 0.0966, "step": 8481, "task_loss": 0.07403028011322021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03582286834716797, "epoch": 8.06, "learning_rate": 4.727346977375584e-05, "loss": 0.0419, "step": 8482, "task_loss": 0.09690375626087189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015889644622802734, "epoch": 8.06, "learning_rate": 4.7268628954340136e-05, "loss": 0.016, "step": 8483, "task_loss": 0.017025936394929886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013576588593423367, "epoch": 8.06, "learning_rate": 4.726378408972074e-05, "loss": 0.0387, "step": 8484, "task_loss": 0.26450613141059875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10775915533304214, "epoch": 8.06, "learning_rate": 4.725893518077774e-05, "loss": 0.1018, "step": 8485, "task_loss": 0.047988370060920715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04508550837635994, "epoch": 8.06, "learning_rate": 4.725408222839197e-05, "loss": 0.0458, "step": 8486, "task_loss": 0.05229977145791054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01839440129697323, "epoch": 8.06, "learning_rate": 4.724922523344498e-05, "loss": 0.0171, "step": 8487, "task_loss": 0.0054893046617507935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17300604283809662, "epoch": 8.06, "learning_rate": 4.724436419681907e-05, "loss": 0.165, "step": 8488, "task_loss": 0.09258658438920975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2892667055130005, "epoch": 8.06, "learning_rate": 4.723949911939728e-05, "loss": 0.2899, "step": 8489, "task_loss": 0.2951303720474243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10540339350700378, "epoch": 8.06, "learning_rate": 4.723463000206337e-05, "loss": 0.102, "step": 8490, "task_loss": 0.07126626372337341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04866549000144005, "epoch": 8.06, "learning_rate": 4.722975684570183e-05, "loss": 0.0448, "step": 8491, "task_loss": 0.009562673047184944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043563053011894226, "epoch": 8.06, "learning_rate": 4.7224879651197905e-05, "loss": 0.0401, "step": 8492, "task_loss": 0.00909213162958622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12174250930547714, "epoch": 8.07, "learning_rate": 4.721999841943755e-05, "loss": 0.1173, "step": 8493, "task_loss": 0.07682164013385773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019868122413754463, "epoch": 8.07, "learning_rate": 4.721511315130747e-05, "loss": 0.029, "step": 8494, "task_loss": 0.11105664074420929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04917510598897934, "epoch": 8.07, "learning_rate": 4.7210223847695104e-05, "loss": 0.0623, "step": 8495, "task_loss": 0.1808261275291443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2701743543148041, "epoch": 8.07, "learning_rate": 4.72053305094886e-05, "loss": 0.2656, "step": 8496, "task_loss": 0.22409585118293762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03515856713056564, "epoch": 8.07, "learning_rate": 4.720043313757687e-05, "loss": 0.0465, "step": 8497, "task_loss": 0.14875555038452148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.20637600123882294, "epoch": 8.07, "learning_rate": 4.719553173284955e-05, "loss": 0.2016, "step": 8498, "task_loss": 0.15902316570281982 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10677587240934372, "epoch": 8.07, "learning_rate": 4.719062629619699e-05, "loss": 0.1103, "step": 8499, "task_loss": 0.14183999598026276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11615284532308578, "epoch": 8.07, "learning_rate": 4.71857168285103e-05, "loss": 0.1115, "step": 8500, "task_loss": 0.07010817527770996 }, { "epoch": 8.07, "eval_accuracy": 0.8864678899082569, "eval_loss": 0.4948354661464691, "eval_runtime": 18.1296, "eval_samples_per_second": 48.098, "eval_steps_per_second": 6.012, "step": 8500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10728442668914795, "epoch": 8.07, "learning_rate": 4.718080333068129e-05, "loss": 0.1144, "step": 8501, "task_loss": 0.17879854142665863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15180592238903046, "epoch": 8.07, "learning_rate": 4.717588580360253e-05, "loss": 0.1669, "step": 8502, "task_loss": 0.30242693424224854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03224493935704231, "epoch": 8.08, "learning_rate": 4.717096424816731e-05, "loss": 0.0302, "step": 8503, "task_loss": 0.012275317683815956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.18254992365837097, "epoch": 8.08, "learning_rate": 4.716603866526967e-05, "loss": 0.1913, "step": 8504, "task_loss": 0.2695552706718445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01585567556321621, "epoch": 8.08, "learning_rate": 4.7161109055804356e-05, "loss": 0.0241, "step": 8505, "task_loss": 0.09879221767187119 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017822248861193657, "epoch": 8.08, "learning_rate": 4.7156175420666844e-05, "loss": 0.0263, "step": 8506, "task_loss": 0.10284404456615448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05827868729829788, "epoch": 8.08, "learning_rate": 4.715123776075336e-05, "loss": 0.0578, "step": 8507, "task_loss": 0.05304684489965439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08329082280397415, "epoch": 8.08, "learning_rate": 4.714629607696086e-05, "loss": 0.0791, "step": 8508, "task_loss": 0.041491199284791946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01390088815242052, "epoch": 8.08, "learning_rate": 4.714135037018702e-05, "loss": 0.0129, "step": 8509, "task_loss": 0.004111597314476967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07340320199728012, "epoch": 8.08, "learning_rate": 4.713640064133025e-05, "loss": 0.0721, "step": 8510, "task_loss": 0.06043552607297897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023112181574106216, "epoch": 8.08, "learning_rate": 4.7131446891289694e-05, "loss": 0.0217, "step": 8511, "task_loss": 0.008507607504725456 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.165248841047287, "epoch": 8.08, "learning_rate": 4.712648912096522e-05, "loss": 0.1713, "step": 8512, "task_loss": 0.2260192334651947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.18116983771324158, "epoch": 8.08, "learning_rate": 4.712152733125744e-05, "loss": 0.1778, "step": 8513, "task_loss": 0.14782485365867615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03632102161645889, "epoch": 8.09, "learning_rate": 4.711656152306768e-05, "loss": 0.0373, "step": 8514, "task_loss": 0.04582914710044861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01714969053864479, "epoch": 8.09, "learning_rate": 4.711159169729801e-05, "loss": 0.0161, "step": 8515, "task_loss": 0.006688836961984634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13296061754226685, "epoch": 8.09, "learning_rate": 4.710661785485121e-05, "loss": 0.1435, "step": 8516, "task_loss": 0.23862020671367645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017725428566336632, "epoch": 8.09, "learning_rate": 4.710163999663081e-05, "loss": 0.0163, "step": 8517, "task_loss": 0.003909563645720482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2052067518234253, "epoch": 8.09, "learning_rate": 4.709665812354107e-05, "loss": 0.1971, "step": 8518, "task_loss": 0.12428037822246552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022109489887952805, "epoch": 8.09, "learning_rate": 4.709167223648695e-05, "loss": 0.0271, "step": 8519, "task_loss": 0.07243168354034424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01610475778579712, "epoch": 8.09, "learning_rate": 4.7086682336374187e-05, "loss": 0.0271, "step": 8520, "task_loss": 0.1261139065027237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1497061550617218, "epoch": 8.09, "learning_rate": 4.70816884241092e-05, "loss": 0.1447, "step": 8521, "task_loss": 0.09966839849948883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05508185923099518, "epoch": 8.09, "learning_rate": 4.7076690500599164e-05, "loss": 0.0549, "step": 8522, "task_loss": 0.05369473248720169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022330323234200478, "epoch": 8.09, "learning_rate": 4.707168856675198e-05, "loss": 0.0208, "step": 8523, "task_loss": 0.007413491606712341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03679078072309494, "epoch": 8.09, "learning_rate": 4.7066682623476265e-05, "loss": 0.0414, "step": 8524, "task_loss": 0.08316943049430847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016818320378661156, "epoch": 8.1, "learning_rate": 4.706167267168138e-05, "loss": 0.0156, "step": 8525, "task_loss": 0.004215966910123825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026049071922898293, "epoch": 8.1, "learning_rate": 4.70566587122774e-05, "loss": 0.0302, "step": 8526, "task_loss": 0.06748848408460617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12002403289079666, "epoch": 8.1, "learning_rate": 4.7051640746175147e-05, "loss": 0.1124, "step": 8527, "task_loss": 0.04354723170399666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1447361707687378, "epoch": 8.1, "learning_rate": 4.7046618774286146e-05, "loss": 0.1348, "step": 8528, "task_loss": 0.04555728659033775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05529174208641052, "epoch": 8.1, "learning_rate": 4.7041592797522664e-05, "loss": 0.0689, "step": 8529, "task_loss": 0.19117003679275513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032082926481962204, "epoch": 8.1, "learning_rate": 4.7036562816797705e-05, "loss": 0.0319, "step": 8530, "task_loss": 0.030074482783675194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04032071679830551, "epoch": 8.1, "learning_rate": 4.7031528833024976e-05, "loss": 0.0484, "step": 8531, "task_loss": 0.12074785679578781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.19668981432914734, "epoch": 8.1, "learning_rate": 4.702649084711892e-05, "loss": 0.1871, "step": 8532, "task_loss": 0.10096022486686707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09332388639450073, "epoch": 8.1, "learning_rate": 4.7021448859994735e-05, "loss": 0.0903, "step": 8533, "task_loss": 0.06286977231502533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07967086136341095, "epoch": 8.1, "learning_rate": 4.70164028725683e-05, "loss": 0.089, "step": 8534, "task_loss": 0.17311781644821167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04477657005190849, "epoch": 8.11, "learning_rate": 4.7011352885756255e-05, "loss": 0.0428, "step": 8535, "task_loss": 0.025373056530952454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09086555242538452, "epoch": 8.11, "learning_rate": 4.7006298900475954e-05, "loss": 0.0939, "step": 8536, "task_loss": 0.12131273001432419 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021383512765169144, "epoch": 8.11, "learning_rate": 4.7001240917645465e-05, "loss": 0.0204, "step": 8537, "task_loss": 0.011899461969733238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11136257648468018, "epoch": 8.11, "learning_rate": 4.699617893818361e-05, "loss": 0.1068, "step": 8538, "task_loss": 0.06574281305074692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.052705250680446625, "epoch": 8.11, "learning_rate": 4.699111296300992e-05, "loss": 0.0526, "step": 8539, "task_loss": 0.05185743421316147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04489322006702423, "epoch": 8.11, "learning_rate": 4.6986042993044645e-05, "loss": 0.0547, "step": 8540, "task_loss": 0.14266639947891235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11219049990177155, "epoch": 8.11, "learning_rate": 4.698096902920877e-05, "loss": 0.1172, "step": 8541, "task_loss": 0.1626092791557312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04092048108577728, "epoch": 8.11, "learning_rate": 4.6975891072424015e-05, "loss": 0.041, "step": 8542, "task_loss": 0.04180833697319031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023674312978982925, "epoch": 8.11, "learning_rate": 4.697080912361281e-05, "loss": 0.0218, "step": 8543, "task_loss": 0.004959875717759132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014916968531906605, "epoch": 8.11, "learning_rate": 4.696572318369831e-05, "loss": 0.0276, "step": 8544, "task_loss": 0.14138737320899963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02858028933405876, "epoch": 8.11, "learning_rate": 4.696063325360441e-05, "loss": 0.0384, "step": 8545, "task_loss": 0.1269652247428894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1576182246208191, "epoch": 8.12, "learning_rate": 4.6955539334255716e-05, "loss": 0.1567, "step": 8546, "task_loss": 0.14879369735717773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060055363923311234, "epoch": 8.12, "learning_rate": 4.6950441426577565e-05, "loss": 0.0705, "step": 8547, "task_loss": 0.16406583786010742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07873189449310303, "epoch": 8.12, "learning_rate": 4.694533953149601e-05, "loss": 0.0885, "step": 8548, "task_loss": 0.17685380578041077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015233471989631653, "epoch": 8.12, "learning_rate": 4.694023364993784e-05, "loss": 0.0165, "step": 8549, "task_loss": 0.027818040922284126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01854429766535759, "epoch": 8.12, "learning_rate": 4.693512378283056e-05, "loss": 0.0253, "step": 8550, "task_loss": 0.08647675812244415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05369843915104866, "epoch": 8.12, "learning_rate": 4.693000993110241e-05, "loss": 0.049, "step": 8551, "task_loss": 0.006441434845328331 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0389072448015213, "epoch": 8.12, "learning_rate": 4.692489209568234e-05, "loss": 0.0412, "step": 8552, "task_loss": 0.06141179800033569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015452216379344463, "epoch": 8.12, "learning_rate": 4.691977027750002e-05, "loss": 0.0143, "step": 8553, "task_loss": 0.004333069548010826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01049085147678852, "epoch": 8.12, "learning_rate": 4.691464447748587e-05, "loss": 0.0097, "step": 8554, "task_loss": 0.003054805099964142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08345725387334824, "epoch": 8.12, "learning_rate": 4.690951469657101e-05, "loss": 0.0808, "step": 8555, "task_loss": 0.05647343769669533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013090891763567924, "epoch": 8.13, "learning_rate": 4.690438093568728e-05, "loss": 0.0123, "step": 8556, "task_loss": 0.005195150151848793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03142661973834038, "epoch": 8.13, "learning_rate": 4.689924319576727e-05, "loss": 0.0294, "step": 8557, "task_loss": 0.010683547705411911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12756206095218658, "epoch": 8.13, "learning_rate": 4.689410147774426e-05, "loss": 0.1162, "step": 8558, "task_loss": 0.014250561594963074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04624927043914795, "epoch": 8.13, "learning_rate": 4.6888955782552274e-05, "loss": 0.0421, "step": 8559, "task_loss": 0.00475945882499218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025834165513515472, "epoch": 8.13, "learning_rate": 4.688380611112605e-05, "loss": 0.0244, "step": 8560, "task_loss": 0.011195512488484383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02918969839811325, "epoch": 8.13, "learning_rate": 4.687865246440106e-05, "loss": 0.0374, "step": 8561, "task_loss": 0.11153009533882141 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012696018442511559, "epoch": 8.13, "learning_rate": 4.687349484331347e-05, "loss": 0.0118, "step": 8562, "task_loss": 0.0034159980714321136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01882791891694069, "epoch": 8.13, "learning_rate": 4.6868333248800204e-05, "loss": 0.0241, "step": 8563, "task_loss": 0.07115612179040909 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1464093029499054, "epoch": 8.13, "learning_rate": 4.686316768179889e-05, "loss": 0.1556, "step": 8564, "task_loss": 0.23828445374965668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0876997858285904, "epoch": 8.13, "learning_rate": 4.685799814324786e-05, "loss": 0.091, "step": 8565, "task_loss": 0.12048880755901337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2233988344669342, "epoch": 8.13, "learning_rate": 4.685282463408621e-05, "loss": 0.2209, "step": 8566, "task_loss": 0.198834627866745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02312629297375679, "epoch": 8.14, "learning_rate": 4.6847647155253716e-05, "loss": 0.0287, "step": 8567, "task_loss": 0.0787849947810173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024075020104646683, "epoch": 8.14, "learning_rate": 4.684246570769089e-05, "loss": 0.0264, "step": 8568, "task_loss": 0.047648604959249496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1928960680961609, "epoch": 8.14, "learning_rate": 4.683728029233898e-05, "loss": 0.1874, "step": 8569, "task_loss": 0.13750508427619934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09075068682432175, "epoch": 8.14, "learning_rate": 4.683209091013994e-05, "loss": 0.0963, "step": 8570, "task_loss": 0.1463644802570343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04680275171995163, "epoch": 8.14, "learning_rate": 4.682689756203643e-05, "loss": 0.0503, "step": 8571, "task_loss": 0.08158842474222183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02860000915825367, "epoch": 8.14, "learning_rate": 4.682170024897187e-05, "loss": 0.0327, "step": 8572, "task_loss": 0.06912478804588318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08807064592838287, "epoch": 8.14, "learning_rate": 4.681649897189036e-05, "loss": 0.0828, "step": 8573, "task_loss": 0.03544972091913223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03363920748233795, "epoch": 8.14, "learning_rate": 4.681129373173674e-05, "loss": 0.0422, "step": 8574, "task_loss": 0.11956753581762314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04612544924020767, "epoch": 8.14, "learning_rate": 4.6806084529456574e-05, "loss": 0.044, "step": 8575, "task_loss": 0.025233760476112366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09824679046869278, "epoch": 8.14, "learning_rate": 4.6800871365996135e-05, "loss": 0.0946, "step": 8576, "task_loss": 0.06206507235765457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08123184740543365, "epoch": 8.15, "learning_rate": 4.679565424230241e-05, "loss": 0.0874, "step": 8577, "task_loss": 0.143352210521698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03590530529618263, "epoch": 8.15, "learning_rate": 4.679043315932313e-05, "loss": 0.04, "step": 8578, "task_loss": 0.07692985236644745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026768725365400314, "epoch": 8.15, "learning_rate": 4.6785208118006715e-05, "loss": 0.0329, "step": 8579, "task_loss": 0.08837291598320007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0306844525039196, "epoch": 8.15, "learning_rate": 4.677997911930234e-05, "loss": 0.0395, "step": 8580, "task_loss": 0.11881330609321594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03350158408284187, "epoch": 8.15, "learning_rate": 4.6774746164159854e-05, "loss": 0.0419, "step": 8581, "task_loss": 0.11749888956546783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16534090042114258, "epoch": 8.15, "learning_rate": 4.676950925352986e-05, "loss": 0.1613, "step": 8582, "task_loss": 0.1252642273902893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01491392683237791, "epoch": 8.15, "learning_rate": 4.676426838836367e-05, "loss": 0.0203, "step": 8583, "task_loss": 0.06893758475780487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04224181920289993, "epoch": 8.15, "learning_rate": 4.675902356961331e-05, "loss": 0.0482, "step": 8584, "task_loss": 0.10220709443092346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024668492376804352, "epoch": 8.15, "learning_rate": 4.675377479823153e-05, "loss": 0.033, "step": 8585, "task_loss": 0.10758410394191742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04424036294221878, "epoch": 8.15, "learning_rate": 4.6748522075171784e-05, "loss": 0.0558, "step": 8586, "task_loss": 0.15995724499225616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05094079673290253, "epoch": 8.15, "learning_rate": 4.674326540138826e-05, "loss": 0.0489, "step": 8587, "task_loss": 0.030478911474347115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14906425774097443, "epoch": 8.16, "learning_rate": 4.673800477783587e-05, "loss": 0.1457, "step": 8588, "task_loss": 0.11561848223209381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051018789410591125, "epoch": 8.16, "learning_rate": 4.6732740205470206e-05, "loss": 0.0608, "step": 8589, "task_loss": 0.14838165044784546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03625892847776413, "epoch": 8.16, "learning_rate": 4.672747168524762e-05, "loss": 0.0397, "step": 8590, "task_loss": 0.07113449275493622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025677144527435303, "epoch": 8.16, "learning_rate": 4.672219921812517e-05, "loss": 0.0392, "step": 8591, "task_loss": 0.16079393029212952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08316448330879211, "epoch": 8.16, "learning_rate": 4.671692280506061e-05, "loss": 0.0914, "step": 8592, "task_loss": 0.16506913304328918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09451714903116226, "epoch": 8.16, "learning_rate": 4.671164244701243e-05, "loss": 0.0976, "step": 8593, "task_loss": 0.12491125613451004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.24787138402462006, "epoch": 8.16, "learning_rate": 4.670635814493984e-05, "loss": 0.2439, "step": 8594, "task_loss": 0.20846538245677948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0107728848233819, "epoch": 8.16, "learning_rate": 4.6701069899802755e-05, "loss": 0.01, "step": 8595, "task_loss": 0.0030124839395284653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12116492539644241, "epoch": 8.16, "learning_rate": 4.669577771256181e-05, "loss": 0.1183, "step": 8596, "task_loss": 0.09291059523820877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034399181604385376, "epoch": 8.16, "learning_rate": 4.6690481584178354e-05, "loss": 0.0317, "step": 8597, "task_loss": 0.007727684453129768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0222720205783844, "epoch": 8.17, "learning_rate": 4.6685181515614454e-05, "loss": 0.0206, "step": 8598, "task_loss": 0.0058763641864061356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07261856645345688, "epoch": 8.17, "learning_rate": 4.6679877507832895e-05, "loss": 0.0695, "step": 8599, "task_loss": 0.041326478123664856 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07069084793329239, "epoch": 8.17, "learning_rate": 4.6674569561797174e-05, "loss": 0.0644, "step": 8600, "task_loss": 0.0074882470071315765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037158627063035965, "epoch": 8.17, "learning_rate": 4.666925767847151e-05, "loss": 0.0424, "step": 8601, "task_loss": 0.08922519534826279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03928046673536301, "epoch": 8.17, "learning_rate": 4.6663941858820825e-05, "loss": 0.051, "step": 8602, "task_loss": 0.15673944354057312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040694400668144226, "epoch": 8.17, "learning_rate": 4.665862210381077e-05, "loss": 0.0384, "step": 8603, "task_loss": 0.01785385236144066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06909865885972977, "epoch": 8.17, "learning_rate": 4.66532984144077e-05, "loss": 0.0671, "step": 8604, "task_loss": 0.04940890148282051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027597632259130478, "epoch": 8.17, "learning_rate": 4.6647970791578685e-05, "loss": 0.0295, "step": 8605, "task_loss": 0.0467480830848217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015055635944008827, "epoch": 8.17, "learning_rate": 4.664263923629153e-05, "loss": 0.014, "step": 8606, "task_loss": 0.0045957863330841064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11752206087112427, "epoch": 8.17, "learning_rate": 4.663730374951472e-05, "loss": 0.1067, "step": 8607, "task_loss": 0.009500864893198013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07718470692634583, "epoch": 8.17, "learning_rate": 4.663196433221747e-05, "loss": 0.0789, "step": 8608, "task_loss": 0.09422293305397034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027676379308104515, "epoch": 8.18, "learning_rate": 4.6626620985369724e-05, "loss": 0.0254, "step": 8609, "task_loss": 0.004925228655338287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05825607106089592, "epoch": 8.18, "learning_rate": 4.662127370994212e-05, "loss": 0.0602, "step": 8610, "task_loss": 0.07727469503879547 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029302431270480156, "epoch": 8.18, "learning_rate": 4.6615922506906016e-05, "loss": 0.0288, "step": 8611, "task_loss": 0.024653693661093712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1494738757610321, "epoch": 8.18, "learning_rate": 4.661056737723349e-05, "loss": 0.1421, "step": 8612, "task_loss": 0.07620874047279358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0703209936618805, "epoch": 8.18, "learning_rate": 4.660520832189732e-05, "loss": 0.0738, "step": 8613, "task_loss": 0.10534647107124329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02900974452495575, "epoch": 8.18, "learning_rate": 4.6599845341871005e-05, "loss": 0.0268, "step": 8614, "task_loss": 0.007358167320489883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013132540509104729, "epoch": 8.18, "learning_rate": 4.6594478438128757e-05, "loss": 0.0122, "step": 8615, "task_loss": 0.003975642845034599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05000152438879013, "epoch": 8.18, "learning_rate": 4.6589107611645497e-05, "loss": 0.0534, "step": 8616, "task_loss": 0.08441969007253647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014517636969685555, "epoch": 8.18, "learning_rate": 4.658373286339688e-05, "loss": 0.0181, "step": 8617, "task_loss": 0.050333477556705475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11546745896339417, "epoch": 8.18, "learning_rate": 4.6578354194359227e-05, "loss": 0.1114, "step": 8618, "task_loss": 0.07436027377843857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03974412381649017, "epoch": 8.19, "learning_rate": 4.657297160550961e-05, "loss": 0.0471, "step": 8619, "task_loss": 0.11349973827600479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02339399978518486, "epoch": 8.19, "learning_rate": 4.656758509782582e-05, "loss": 0.0319, "step": 8620, "task_loss": 0.10848347842693329 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03902260586619377, "epoch": 8.19, "learning_rate": 4.6562194672286306e-05, "loss": 0.0458, "step": 8621, "task_loss": 0.10702133923768997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029453573748469353, "epoch": 8.19, "learning_rate": 4.65568003298703e-05, "loss": 0.0416, "step": 8622, "task_loss": 0.15059900283813477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0717625841498375, "epoch": 8.19, "learning_rate": 4.655140207155769e-05, "loss": 0.0746, "step": 8623, "task_loss": 0.09989339113235474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03731407970190048, "epoch": 8.19, "learning_rate": 4.65459998983291e-05, "loss": 0.0491, "step": 8624, "task_loss": 0.15505677461624146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0684528648853302, "epoch": 8.19, "learning_rate": 4.6540593811165866e-05, "loss": 0.0718, "step": 8625, "task_loss": 0.10180987417697906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.18652784824371338, "epoch": 8.19, "learning_rate": 4.653518381105002e-05, "loss": 0.1783, "step": 8626, "task_loss": 0.10443468391895294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0480349101126194, "epoch": 8.19, "learning_rate": 4.6529769898964325e-05, "loss": 0.0462, "step": 8627, "task_loss": 0.02956531010568142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09778586030006409, "epoch": 8.19, "learning_rate": 4.652435207589224e-05, "loss": 0.1007, "step": 8628, "task_loss": 0.12683331966400146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025784488767385483, "epoch": 8.19, "learning_rate": 4.651893034281793e-05, "loss": 0.0257, "step": 8629, "task_loss": 0.024955110624432564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05094904452562332, "epoch": 8.2, "learning_rate": 4.6513504700726293e-05, "loss": 0.065, "step": 8630, "task_loss": 0.19132231175899506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10476318001747131, "epoch": 8.2, "learning_rate": 4.650807515060291e-05, "loss": 0.1067, "step": 8631, "task_loss": 0.12431478500366211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17144785821437836, "epoch": 8.2, "learning_rate": 4.650264169343411e-05, "loss": 0.1651, "step": 8632, "task_loss": 0.108132004737854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04371756315231323, "epoch": 8.2, "learning_rate": 4.6497204330206874e-05, "loss": 0.0494, "step": 8633, "task_loss": 0.10011433064937592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024459153413772583, "epoch": 8.2, "learning_rate": 4.649176306190895e-05, "loss": 0.0227, "step": 8634, "task_loss": 0.0073354970663785934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030715011060237885, "epoch": 8.2, "learning_rate": 4.648631788952874e-05, "loss": 0.0359, "step": 8635, "task_loss": 0.0821438580751419 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07763121277093887, "epoch": 8.2, "learning_rate": 4.6480868814055424e-05, "loss": 0.0733, "step": 8636, "task_loss": 0.0338205024600029 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029433991760015488, "epoch": 8.2, "learning_rate": 4.647541583647883e-05, "loss": 0.0364, "step": 8637, "task_loss": 0.09859539568424225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2271520048379898, "epoch": 8.2, "learning_rate": 4.646995895778952e-05, "loss": 0.2236, "step": 8638, "task_loss": 0.19156351685523987 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0869775339961052, "epoch": 8.2, "learning_rate": 4.646449817897876e-05, "loss": 0.0906, "step": 8639, "task_loss": 0.1231326162815094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024121176451444626, "epoch": 8.21, "learning_rate": 4.645903350103855e-05, "loss": 0.0275, "step": 8640, "task_loss": 0.058099135756492615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019324442371726036, "epoch": 8.21, "learning_rate": 4.6453564924961544e-05, "loss": 0.0184, "step": 8641, "task_loss": 0.010152887552976608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01817592792212963, "epoch": 8.21, "learning_rate": 4.644809245174114e-05, "loss": 0.0266, "step": 8642, "task_loss": 0.10227106511592865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02699386328458786, "epoch": 8.21, "learning_rate": 4.6442616082371466e-05, "loss": 0.0387, "step": 8643, "task_loss": 0.14438271522521973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0430140346288681, "epoch": 8.21, "learning_rate": 4.64371358178473e-05, "loss": 0.0394, "step": 8644, "task_loss": 0.006518969312310219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03487422317266464, "epoch": 8.21, "learning_rate": 4.6431651659164174e-05, "loss": 0.0456, "step": 8645, "task_loss": 0.14229948818683624 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027154915034770966, "epoch": 8.21, "learning_rate": 4.6426163607318305e-05, "loss": 0.0251, "step": 8646, "task_loss": 0.006713952869176865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03345032036304474, "epoch": 8.21, "learning_rate": 4.642067166330663e-05, "loss": 0.0407, "step": 8647, "task_loss": 0.10630976408720016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030117200687527657, "epoch": 8.21, "learning_rate": 4.6415175828126786e-05, "loss": 0.0276, "step": 8648, "task_loss": 0.004965195432305336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08309546858072281, "epoch": 8.21, "learning_rate": 4.640967610277711e-05, "loss": 0.0893, "step": 8649, "task_loss": 0.14526864886283875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10631243884563446, "epoch": 8.21, "learning_rate": 4.640417248825667e-05, "loss": 0.102, "step": 8650, "task_loss": 0.0636465847492218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04527665674686432, "epoch": 8.22, "learning_rate": 4.63986649855652e-05, "loss": 0.0411, "step": 8651, "task_loss": 0.003472359851002693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1377941519021988, "epoch": 8.22, "learning_rate": 4.639315359570319e-05, "loss": 0.1355, "step": 8652, "task_loss": 0.11462458968162537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03246547654271126, "epoch": 8.22, "learning_rate": 4.6387638319671786e-05, "loss": 0.0305, "step": 8653, "task_loss": 0.013004310429096222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028419774025678635, "epoch": 8.22, "learning_rate": 4.6382119158472895e-05, "loss": 0.0263, "step": 8654, "task_loss": 0.006932957097887993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017502637580037117, "epoch": 8.22, "learning_rate": 4.637659611310907e-05, "loss": 0.0174, "step": 8655, "task_loss": 0.01657920889556408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13791508972644806, "epoch": 8.22, "learning_rate": 4.637106918458361e-05, "loss": 0.1373, "step": 8656, "task_loss": 0.1319553405046463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.055541716516017914, "epoch": 8.22, "learning_rate": 4.636553837390051e-05, "loss": 0.0648, "step": 8657, "task_loss": 0.14844538271427155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10020337253808975, "epoch": 8.22, "learning_rate": 4.636000368206447e-05, "loss": 0.0989, "step": 8658, "task_loss": 0.08745051920413971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09979698061943054, "epoch": 8.22, "learning_rate": 4.6354465110080885e-05, "loss": 0.0986, "step": 8659, "task_loss": 0.08759049326181412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12549327313899994, "epoch": 8.22, "learning_rate": 4.6348922658955874e-05, "loss": 0.1235, "step": 8660, "task_loss": 0.10560569912195206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11727667599916458, "epoch": 8.23, "learning_rate": 4.634337632969624e-05, "loss": 0.1162, "step": 8661, "task_loss": 0.10662281513214111 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04554348438978195, "epoch": 8.23, "learning_rate": 4.6337826123309505e-05, "loss": 0.0446, "step": 8662, "task_loss": 0.035856109112501144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.052027251571416855, "epoch": 8.23, "learning_rate": 4.6332272040803895e-05, "loss": 0.0485, "step": 8663, "task_loss": 0.016855746507644653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0639544203877449, "epoch": 8.23, "learning_rate": 4.632671408318833e-05, "loss": 0.0608, "step": 8664, "task_loss": 0.03275006264448166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07394473999738693, "epoch": 8.23, "learning_rate": 4.6321152251472435e-05, "loss": 0.081, "step": 8665, "task_loss": 0.14465902745723724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07188697159290314, "epoch": 8.23, "learning_rate": 4.6315586546666556e-05, "loss": 0.0734, "step": 8666, "task_loss": 0.08725833147764206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023782558739185333, "epoch": 8.23, "learning_rate": 4.631001696978172e-05, "loss": 0.0278, "step": 8667, "task_loss": 0.06402748823165894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032316289842128754, "epoch": 8.23, "learning_rate": 4.630444352182968e-05, "loss": 0.0384, "step": 8668, "task_loss": 0.09317083656787872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030993333086371422, "epoch": 8.23, "learning_rate": 4.6298866203822865e-05, "loss": 0.0293, "step": 8669, "task_loss": 0.01442483440041542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11551865190267563, "epoch": 8.23, "learning_rate": 4.629328501677442e-05, "loss": 0.1106, "step": 8670, "task_loss": 0.06652466952800751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10263466089963913, "epoch": 8.23, "learning_rate": 4.6287699961698214e-05, "loss": 0.0992, "step": 8671, "task_loss": 0.06787226349115372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04704195261001587, "epoch": 8.24, "learning_rate": 4.6282111039608784e-05, "loss": 0.0512, "step": 8672, "task_loss": 0.08842041343450546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11540669202804565, "epoch": 8.24, "learning_rate": 4.6276518251521384e-05, "loss": 0.1093, "step": 8673, "task_loss": 0.05394323915243149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019262634217739105, "epoch": 8.24, "learning_rate": 4.6270921598451974e-05, "loss": 0.0268, "step": 8674, "task_loss": 0.09500335156917572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06018088012933731, "epoch": 8.24, "learning_rate": 4.6265321081417223e-05, "loss": 0.0591, "step": 8675, "task_loss": 0.049118801951408386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13014157116413116, "epoch": 8.24, "learning_rate": 4.625971670143447e-05, "loss": 0.1193, "step": 8676, "task_loss": 0.02204965241253376 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024654846638441086, "epoch": 8.24, "learning_rate": 4.625410845952181e-05, "loss": 0.023, "step": 8677, "task_loss": 0.007745366543531418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02617044746875763, "epoch": 8.24, "learning_rate": 4.6248496356697966e-05, "loss": 0.0267, "step": 8678, "task_loss": 0.03177504613995552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022578658536076546, "epoch": 8.24, "learning_rate": 4.6242880393982436e-05, "loss": 0.0278, "step": 8679, "task_loss": 0.07478535175323486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06433162093162537, "epoch": 8.24, "learning_rate": 4.623726057239537e-05, "loss": 0.0829, "step": 8680, "task_loss": 0.24952758848667145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028324808925390244, "epoch": 8.24, "learning_rate": 4.623163689295764e-05, "loss": 0.0271, "step": 8681, "task_loss": 0.01628146879374981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03707346320152283, "epoch": 8.25, "learning_rate": 4.6226009356690825e-05, "loss": 0.0343, "step": 8682, "task_loss": 0.009730465710163116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0707152709364891, "epoch": 8.25, "learning_rate": 4.622037796461718e-05, "loss": 0.0825, "step": 8683, "task_loss": 0.18847694993019104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015443956479430199, "epoch": 8.25, "learning_rate": 4.621474271775968e-05, "loss": 0.0152, "step": 8684, "task_loss": 0.013239886611700058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0768551155924797, "epoch": 8.25, "learning_rate": 4.620910361714199e-05, "loss": 0.0735, "step": 8685, "task_loss": 0.043199822306632996 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04744374752044678, "epoch": 8.25, "learning_rate": 4.620346066378849e-05, "loss": 0.0446, "step": 8686, "task_loss": 0.018627936020493507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03644431009888649, "epoch": 8.25, "learning_rate": 4.619781385872424e-05, "loss": 0.0407, "step": 8687, "task_loss": 0.07931828498840332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028153453022241592, "epoch": 8.25, "learning_rate": 4.6192163202975013e-05, "loss": 0.0388, "step": 8688, "task_loss": 0.1342998743057251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02188189886510372, "epoch": 8.25, "learning_rate": 4.618650869756728e-05, "loss": 0.0249, "step": 8689, "task_loss": 0.0519079715013504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1566876918077469, "epoch": 8.25, "learning_rate": 4.6180850343528205e-05, "loss": 0.1496, "step": 8690, "task_loss": 0.08567800372838974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04210164025425911, "epoch": 8.25, "learning_rate": 4.617518814188565e-05, "loss": 0.0436, "step": 8691, "task_loss": 0.0568234808743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019453078508377075, "epoch": 8.25, "learning_rate": 4.6169522093668196e-05, "loss": 0.0268, "step": 8692, "task_loss": 0.09267456084489822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028510289266705513, "epoch": 8.26, "learning_rate": 4.61638521999051e-05, "loss": 0.0422, "step": 8693, "task_loss": 0.16586299240589142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09608705341815948, "epoch": 8.26, "learning_rate": 4.6158178461626323e-05, "loss": 0.0922, "step": 8694, "task_loss": 0.057251427322626114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035421546548604965, "epoch": 8.26, "learning_rate": 4.615250087986254e-05, "loss": 0.0374, "step": 8695, "task_loss": 0.05525943636894226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03646400570869446, "epoch": 8.26, "learning_rate": 4.6146819455645086e-05, "loss": 0.0426, "step": 8696, "task_loss": 0.09792999178171158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08208326995372772, "epoch": 8.26, "learning_rate": 4.614113419000604e-05, "loss": 0.0778, "step": 8697, "task_loss": 0.0388401597738266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01685364544391632, "epoch": 8.26, "learning_rate": 4.613544508397815e-05, "loss": 0.0169, "step": 8698, "task_loss": 0.017076315358281136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02594437077641487, "epoch": 8.26, "learning_rate": 4.6129752138594874e-05, "loss": 0.0407, "step": 8699, "task_loss": 0.17328821122646332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02614816278219223, "epoch": 8.26, "learning_rate": 4.612405535489036e-05, "loss": 0.0368, "step": 8700, "task_loss": 0.1322673261165619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037083324044942856, "epoch": 8.26, "learning_rate": 4.611835473389945e-05, "loss": 0.034, "step": 8701, "task_loss": 0.005980234593153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019921965897083282, "epoch": 8.26, "learning_rate": 4.61126502766577e-05, "loss": 0.0183, "step": 8702, "task_loss": 0.0037397872656583786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07418224960565567, "epoch": 8.26, "learning_rate": 4.6106941984201344e-05, "loss": 0.0676, "step": 8703, "task_loss": 0.008437564596533775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0171127300709486, "epoch": 8.27, "learning_rate": 4.610122985756733e-05, "loss": 0.0281, "step": 8704, "task_loss": 0.12655523419380188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041877150535583496, "epoch": 8.27, "learning_rate": 4.609551389779328e-05, "loss": 0.0504, "step": 8705, "task_loss": 0.12753605842590332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09651815891265869, "epoch": 8.27, "learning_rate": 4.6089794105917544e-05, "loss": 0.0924, "step": 8706, "task_loss": 0.05498852580785751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026990918442606926, "epoch": 8.27, "learning_rate": 4.6084070482979135e-05, "loss": 0.0247, "step": 8707, "task_loss": 0.00403929129242897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01692625693976879, "epoch": 8.27, "learning_rate": 4.607834303001778e-05, "loss": 0.0239, "step": 8708, "task_loss": 0.08626563102006912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02325316146016121, "epoch": 8.27, "learning_rate": 4.60726117480739e-05, "loss": 0.0215, "step": 8709, "task_loss": 0.005591306835412979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012971121817827225, "epoch": 8.27, "learning_rate": 4.6066876638188604e-05, "loss": 0.0121, "step": 8710, "task_loss": 0.0038487426936626434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10507965087890625, "epoch": 8.27, "learning_rate": 4.606113770140371e-05, "loss": 0.1015, "step": 8711, "task_loss": 0.06916863471269608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04460505023598671, "epoch": 8.27, "learning_rate": 4.605539493876173e-05, "loss": 0.0626, "step": 8712, "task_loss": 0.224493145942688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13093966245651245, "epoch": 8.27, "learning_rate": 4.604964835130585e-05, "loss": 0.1438, "step": 8713, "task_loss": 0.2592054605484009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.054761841893196106, "epoch": 8.28, "learning_rate": 4.6043897940079964e-05, "loss": 0.0584, "step": 8714, "task_loss": 0.0908980518579483 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10678750276565552, "epoch": 8.28, "learning_rate": 4.603814370612867e-05, "loss": 0.1026, "step": 8715, "task_loss": 0.06508254259824753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04329557716846466, "epoch": 8.28, "learning_rate": 4.603238565049726e-05, "loss": 0.047, "step": 8716, "task_loss": 0.08073478937149048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01392638310790062, "epoch": 8.28, "learning_rate": 4.60266237742317e-05, "loss": 0.013, "step": 8717, "task_loss": 0.00464806891977787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1740546077489853, "epoch": 8.28, "learning_rate": 4.602085807837866e-05, "loss": 0.1735, "step": 8718, "task_loss": 0.1681438386440277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1236797422170639, "epoch": 8.28, "learning_rate": 4.601508856398552e-05, "loss": 0.1305, "step": 8719, "task_loss": 0.19167181849479675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043940044939517975, "epoch": 8.28, "learning_rate": 4.6009315232100324e-05, "loss": 0.0636, "step": 8720, "task_loss": 0.2409691959619522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09241293370723724, "epoch": 8.28, "learning_rate": 4.600353808377184e-05, "loss": 0.0951, "step": 8721, "task_loss": 0.119747593998909 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020489653572440147, "epoch": 8.28, "learning_rate": 4.599775712004951e-05, "loss": 0.0191, "step": 8722, "task_loss": 0.0063454341143369675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09082825481891632, "epoch": 8.28, "learning_rate": 4.599197234198347e-05, "loss": 0.0912, "step": 8723, "task_loss": 0.09437683969736099 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029831204563379288, "epoch": 8.28, "learning_rate": 4.5986183750624555e-05, "loss": 0.0506, "step": 8724, "task_loss": 0.23745323717594147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06204962357878685, "epoch": 8.29, "learning_rate": 4.5980391347024296e-05, "loss": 0.062, "step": 8725, "task_loss": 0.061779238283634186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03659776970744133, "epoch": 8.29, "learning_rate": 4.59745951322349e-05, "loss": 0.0351, "step": 8726, "task_loss": 0.02147502824664116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024865522980690002, "epoch": 8.29, "learning_rate": 4.596879510730929e-05, "loss": 0.0345, "step": 8727, "task_loss": 0.1214059591293335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04416849464178085, "epoch": 8.29, "learning_rate": 4.596299127330106e-05, "loss": 0.0599, "step": 8728, "task_loss": 0.20144742727279663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06300773471593857, "epoch": 8.29, "learning_rate": 4.59571836312645e-05, "loss": 0.0675, "step": 8729, "task_loss": 0.10772567242383957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03947564959526062, "epoch": 8.29, "learning_rate": 4.595137218225461e-05, "loss": 0.0452, "step": 8730, "task_loss": 0.09653180092573166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021150944754481316, "epoch": 8.29, "learning_rate": 4.594555692732706e-05, "loss": 0.0268, "step": 8731, "task_loss": 0.07796701788902283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17825676500797272, "epoch": 8.29, "learning_rate": 4.593973786753821e-05, "loss": 0.1831, "step": 8732, "task_loss": 0.22676941752433777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012715199962258339, "epoch": 8.29, "learning_rate": 4.593391500394514e-05, "loss": 0.0187, "step": 8733, "task_loss": 0.07283098250627518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13977791368961334, "epoch": 8.29, "learning_rate": 4.5928088337605586e-05, "loss": 0.1398, "step": 8734, "task_loss": 0.1397220343351364 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14722847938537598, "epoch": 8.3, "learning_rate": 4.5922257869578e-05, "loss": 0.1469, "step": 8735, "task_loss": 0.14371134340763092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15708275139331818, "epoch": 8.3, "learning_rate": 4.5916423600921496e-05, "loss": 0.1519, "step": 8736, "task_loss": 0.10510636121034622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057722412049770355, "epoch": 8.3, "learning_rate": 4.591058553269593e-05, "loss": 0.0712, "step": 8737, "task_loss": 0.1927632987499237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04139891266822815, "epoch": 8.3, "learning_rate": 4.590474366596178e-05, "loss": 0.0431, "step": 8738, "task_loss": 0.05822301283478737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021710364148020744, "epoch": 8.3, "learning_rate": 4.589889800178026e-05, "loss": 0.0253, "step": 8739, "task_loss": 0.05777839943766594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022335700690746307, "epoch": 8.3, "learning_rate": 4.589304854121329e-05, "loss": 0.0211, "step": 8740, "task_loss": 0.009688341990113258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026100587099790573, "epoch": 8.3, "learning_rate": 4.588719528532342e-05, "loss": 0.0327, "step": 8741, "task_loss": 0.09173382818698883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12068310379981995, "epoch": 8.3, "learning_rate": 4.588133823517392e-05, "loss": 0.115, "step": 8742, "task_loss": 0.06337210536003113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025651680305600166, "epoch": 8.3, "learning_rate": 4.587547739182878e-05, "loss": 0.0238, "step": 8743, "task_loss": 0.006972752511501312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11778855323791504, "epoch": 8.3, "learning_rate": 4.586961275635263e-05, "loss": 0.1207, "step": 8744, "task_loss": 0.1473734825849533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14205443859100342, "epoch": 8.3, "learning_rate": 4.586374432981081e-05, "loss": 0.1341, "step": 8745, "task_loss": 0.06203455477952957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018492717295885086, "epoch": 8.31, "learning_rate": 4.585787211326935e-05, "loss": 0.0172, "step": 8746, "task_loss": 0.0057936906814575195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10602115839719772, "epoch": 8.31, "learning_rate": 4.5851996107794975e-05, "loss": 0.1199, "step": 8747, "task_loss": 0.24453692138195038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022601492702960968, "epoch": 8.31, "learning_rate": 4.584611631445508e-05, "loss": 0.0279, "step": 8748, "task_loss": 0.07518120110034943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05565394088625908, "epoch": 8.31, "learning_rate": 4.5840232734317754e-05, "loss": 0.0597, "step": 8749, "task_loss": 0.09581439942121506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035259947180747986, "epoch": 8.31, "learning_rate": 4.583434536845179e-05, "loss": 0.0394, "step": 8750, "task_loss": 0.07706654071807861 }, { "epoch": 8.31, "eval_accuracy": 0.8956422018348624, "eval_loss": 0.4291454553604126, "eval_runtime": 17.9719, "eval_samples_per_second": 48.52, "eval_steps_per_second": 6.065, "step": 8750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06929297000169754, "epoch": 8.31, "learning_rate": 4.5828454217926654e-05, "loss": 0.0863, "step": 8751, "task_loss": 0.23925699293613434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06375081837177277, "epoch": 8.31, "learning_rate": 4.5822559283812496e-05, "loss": 0.0626, "step": 8752, "task_loss": 0.05265332758426666 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11460275948047638, "epoch": 8.31, "learning_rate": 4.581666056718016e-05, "loss": 0.1133, "step": 8753, "task_loss": 0.10144174098968506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0539216548204422, "epoch": 8.31, "learning_rate": 4.5810758069101175e-05, "loss": 0.052, "step": 8754, "task_loss": 0.034788183867931366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030059363692998886, "epoch": 8.31, "learning_rate": 4.580485179064777e-05, "loss": 0.0336, "step": 8755, "task_loss": 0.06580464541912079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1027214527130127, "epoch": 8.32, "learning_rate": 4.579894173289284e-05, "loss": 0.1106, "step": 8756, "task_loss": 0.18194906413555145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08241204172372818, "epoch": 8.32, "learning_rate": 4.579302789690997e-05, "loss": 0.0806, "step": 8757, "task_loss": 0.0640886202454567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016954604536294937, "epoch": 8.32, "learning_rate": 4.578711028377344e-05, "loss": 0.0219, "step": 8758, "task_loss": 0.06677691638469696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02651621401309967, "epoch": 8.32, "learning_rate": 4.578118889455821e-05, "loss": 0.0253, "step": 8759, "task_loss": 0.014431167393922806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03452229127287865, "epoch": 8.32, "learning_rate": 4.577526373033994e-05, "loss": 0.0328, "step": 8760, "task_loss": 0.017721977084875107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08960497379302979, "epoch": 8.32, "learning_rate": 4.576933479219496e-05, "loss": 0.0924, "step": 8761, "task_loss": 0.11729642748832703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07891800254583359, "epoch": 8.32, "learning_rate": 4.5763402081200294e-05, "loss": 0.0769, "step": 8762, "task_loss": 0.05859693884849548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02912452071905136, "epoch": 8.32, "learning_rate": 4.575746559843364e-05, "loss": 0.0273, "step": 8763, "task_loss": 0.010460572317242622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028819631785154343, "epoch": 8.32, "learning_rate": 4.5751525344973384e-05, "loss": 0.0329, "step": 8764, "task_loss": 0.06981261819601059 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05606947839260101, "epoch": 8.32, "learning_rate": 4.5745581321898615e-05, "loss": 0.0654, "step": 8765, "task_loss": 0.14961570501327515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019708652049303055, "epoch": 8.32, "learning_rate": 4.5739633530289085e-05, "loss": 0.0288, "step": 8766, "task_loss": 0.1107863336801529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043510615825653076, "epoch": 8.33, "learning_rate": 4.573368197122524e-05, "loss": 0.0518, "step": 8767, "task_loss": 0.12616392970085144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04988428205251694, "epoch": 8.33, "learning_rate": 4.572772664578821e-05, "loss": 0.0522, "step": 8768, "task_loss": 0.07312482595443726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047488268464803696, "epoch": 8.33, "learning_rate": 4.572176755505981e-05, "loss": 0.0509, "step": 8769, "task_loss": 0.08157598972320557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05487481504678726, "epoch": 8.33, "learning_rate": 4.571580470012254e-05, "loss": 0.0548, "step": 8770, "task_loss": 0.05453791469335556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02108554169535637, "epoch": 8.33, "learning_rate": 4.5709838082059574e-05, "loss": 0.0193, "step": 8771, "task_loss": 0.002928614616394043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09459959715604782, "epoch": 8.33, "learning_rate": 4.570386770195478e-05, "loss": 0.0858, "step": 8772, "task_loss": 0.006600510329008102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028554365038871765, "epoch": 8.33, "learning_rate": 4.569789356089271e-05, "loss": 0.0308, "step": 8773, "task_loss": 0.05108209699392319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051416561007499695, "epoch": 8.33, "learning_rate": 4.569191565995859e-05, "loss": 0.0613, "step": 8774, "task_loss": 0.15025511384010315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020000001415610313, "epoch": 8.33, "learning_rate": 4.568593400023834e-05, "loss": 0.0228, "step": 8775, "task_loss": 0.047657888382673264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03664660453796387, "epoch": 8.33, "learning_rate": 4.567994858281855e-05, "loss": 0.0338, "step": 8776, "task_loss": 0.008631901815533638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08228196948766708, "epoch": 8.34, "learning_rate": 4.56739594087865e-05, "loss": 0.0783, "step": 8777, "task_loss": 0.042297106236219406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035452522337436676, "epoch": 8.34, "learning_rate": 4.566796647923017e-05, "loss": 0.0449, "step": 8778, "task_loss": 0.13000640273094177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011653019115328789, "epoch": 8.34, "learning_rate": 4.566196979523818e-05, "loss": 0.0197, "step": 8779, "task_loss": 0.09172951430082321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08766936510801315, "epoch": 8.34, "learning_rate": 4.5655969357899874e-05, "loss": 0.0842, "step": 8780, "task_loss": 0.05248169228434563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08307437598705292, "epoch": 8.34, "learning_rate": 4.564996516830525e-05, "loss": 0.0812, "step": 8781, "task_loss": 0.06468552350997925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01311748381704092, "epoch": 8.34, "learning_rate": 4.564395722754501e-05, "loss": 0.0121, "step": 8782, "task_loss": 0.0029160063713788986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022205941379070282, "epoch": 8.34, "learning_rate": 4.56379455367105e-05, "loss": 0.0262, "step": 8783, "task_loss": 0.061669085174798965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029047932475805283, "epoch": 8.34, "learning_rate": 4.563193009689381e-05, "loss": 0.0371, "step": 8784, "task_loss": 0.10945844650268555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019876539707183838, "epoch": 8.34, "learning_rate": 4.562591090918764e-05, "loss": 0.0275, "step": 8785, "task_loss": 0.09562841057777405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01689576543867588, "epoch": 8.34, "learning_rate": 4.561988797468542e-05, "loss": 0.0234, "step": 8786, "task_loss": 0.08180932700634003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023341480642557144, "epoch": 8.34, "learning_rate": 4.561386129448125e-05, "loss": 0.0305, "step": 8787, "task_loss": 0.09533084183931351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015885740518569946, "epoch": 8.35, "learning_rate": 4.5607830869669885e-05, "loss": 0.0212, "step": 8788, "task_loss": 0.06913591921329498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026201194152235985, "epoch": 8.35, "learning_rate": 4.560179670134681e-05, "loss": 0.025, "step": 8789, "task_loss": 0.013938097283244133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08920443058013916, "epoch": 8.35, "learning_rate": 4.559575879060813e-05, "loss": 0.0968, "step": 8790, "task_loss": 0.16520185768604279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1296762377023697, "epoch": 8.35, "learning_rate": 4.5589717138550685e-05, "loss": 0.1242, "step": 8791, "task_loss": 0.07465289533138275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016865486279129982, "epoch": 8.35, "learning_rate": 4.5583671746271964e-05, "loss": 0.0156, "step": 8792, "task_loss": 0.00429266132414341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030185125768184662, "epoch": 8.35, "learning_rate": 4.557762261487013e-05, "loss": 0.0461, "step": 8793, "task_loss": 0.18932506442070007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039688460528850555, "epoch": 8.35, "learning_rate": 4.557156974544404e-05, "loss": 0.037, "step": 8794, "task_loss": 0.013088133186101913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06738491356372833, "epoch": 8.35, "learning_rate": 4.5565513139093244e-05, "loss": 0.0756, "step": 8795, "task_loss": 0.1495150625705719 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05773467570543289, "epoch": 8.35, "learning_rate": 4.5559452796917936e-05, "loss": 0.0613, "step": 8796, "task_loss": 0.09331423789262772 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026063233613967896, "epoch": 8.35, "learning_rate": 4.555338872001901e-05, "loss": 0.0311, "step": 8797, "task_loss": 0.07620520889759064 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018153179436922073, "epoch": 8.36, "learning_rate": 4.554732090949805e-05, "loss": 0.0175, "step": 8798, "task_loss": 0.011946845799684525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03151557967066765, "epoch": 8.36, "learning_rate": 4.5541249366457276e-05, "loss": 0.0288, "step": 8799, "task_loss": 0.0040736570954322815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01914232410490513, "epoch": 8.36, "learning_rate": 4.5535174091999636e-05, "loss": 0.0221, "step": 8800, "task_loss": 0.04857531934976578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023833172395825386, "epoch": 8.36, "learning_rate": 4.552909508722871e-05, "loss": 0.0246, "step": 8801, "task_loss": 0.03120681270956993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021301060914993286, "epoch": 8.36, "learning_rate": 4.55230123532488e-05, "loss": 0.0197, "step": 8802, "task_loss": 0.005311897024512291 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034260865300893784, "epoch": 8.36, "learning_rate": 4.551692589116486e-05, "loss": 0.0544, "step": 8803, "task_loss": 0.23611676692962646 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036078743636608124, "epoch": 8.36, "learning_rate": 4.551083570208252e-05, "loss": 0.0371, "step": 8804, "task_loss": 0.0467095673084259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14268112182617188, "epoch": 8.36, "learning_rate": 4.550474178710809e-05, "loss": 0.1415, "step": 8805, "task_loss": 0.1312038004398346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014835448935627937, "epoch": 8.36, "learning_rate": 4.549864414734856e-05, "loss": 0.0139, "step": 8806, "task_loss": 0.005272580310702324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06314484775066376, "epoch": 8.36, "learning_rate": 4.54925427839116e-05, "loss": 0.0702, "step": 8807, "task_loss": 0.13396084308624268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09572551399469376, "epoch": 8.36, "learning_rate": 4.548643769790556e-05, "loss": 0.0963, "step": 8808, "task_loss": 0.10133033990859985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017699236050248146, "epoch": 8.37, "learning_rate": 4.548032889043944e-05, "loss": 0.0374, "step": 8809, "task_loss": 0.21495309472084045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05119137093424797, "epoch": 8.37, "learning_rate": 4.547421636262294e-05, "loss": 0.05, "step": 8810, "task_loss": 0.039462827146053314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024145277217030525, "epoch": 8.37, "learning_rate": 4.546810011556644e-05, "loss": 0.0334, "step": 8811, "task_loss": 0.11649482697248459 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10837046056985855, "epoch": 8.37, "learning_rate": 4.546198015038097e-05, "loss": 0.1182, "step": 8812, "task_loss": 0.20654115080833435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10691244900226593, "epoch": 8.37, "learning_rate": 4.545585646817826e-05, "loss": 0.1056, "step": 8813, "task_loss": 0.09399950504302979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10578563064336777, "epoch": 8.37, "learning_rate": 4.544972907007071e-05, "loss": 0.1143, "step": 8814, "task_loss": 0.1912904679775238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07613591104745865, "epoch": 8.37, "learning_rate": 4.544359795717139e-05, "loss": 0.0696, "step": 8815, "task_loss": 0.010670226067304611 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03094104677438736, "epoch": 8.37, "learning_rate": 4.543746313059404e-05, "loss": 0.0365, "step": 8816, "task_loss": 0.08633188903331757 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0631537064909935, "epoch": 8.37, "learning_rate": 4.5431324591453094e-05, "loss": 0.0812, "step": 8817, "task_loss": 0.2431773990392685 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034764595329761505, "epoch": 8.37, "learning_rate": 4.5425182340863626e-05, "loss": 0.0319, "step": 8818, "task_loss": 0.006121266633272171 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035581640899181366, "epoch": 8.38, "learning_rate": 4.541903637994142e-05, "loss": 0.033, "step": 8819, "task_loss": 0.009517015889286995 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08049920201301575, "epoch": 8.38, "learning_rate": 4.541288670980291e-05, "loss": 0.0904, "step": 8820, "task_loss": 0.17998726665973663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06685604900121689, "epoch": 8.38, "learning_rate": 4.540673333156523e-05, "loss": 0.0659, "step": 8821, "task_loss": 0.057141952216625214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0435507632791996, "epoch": 8.38, "learning_rate": 4.540057624634616e-05, "loss": 0.0404, "step": 8822, "task_loss": 0.01237713173031807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03415733203291893, "epoch": 8.38, "learning_rate": 4.5394415455264164e-05, "loss": 0.0395, "step": 8823, "task_loss": 0.0872575044631958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060022905468940735, "epoch": 8.38, "learning_rate": 4.538825095943838e-05, "loss": 0.0712, "step": 8824, "task_loss": 0.17156316339969635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04664577543735504, "epoch": 8.38, "learning_rate": 4.538208275998861e-05, "loss": 0.05, "step": 8825, "task_loss": 0.08014590293169022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08388902246952057, "epoch": 8.38, "learning_rate": 4.537591085803535e-05, "loss": 0.0931, "step": 8826, "task_loss": 0.17568424344062805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02333504892885685, "epoch": 8.38, "learning_rate": 4.5369735254699754e-05, "loss": 0.0281, "step": 8827, "task_loss": 0.07060873508453369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07975588738918304, "epoch": 8.38, "learning_rate": 4.536355595110365e-05, "loss": 0.0783, "step": 8828, "task_loss": 0.06561661511659622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05120707303285599, "epoch": 8.38, "learning_rate": 4.5357372948369534e-05, "loss": 0.0468, "step": 8829, "task_loss": 0.007422303780913353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08561455458402634, "epoch": 8.39, "learning_rate": 4.535118624762057e-05, "loss": 0.0831, "step": 8830, "task_loss": 0.06040613353252411 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02236170694231987, "epoch": 8.39, "learning_rate": 4.534499584998062e-05, "loss": 0.0205, "step": 8831, "task_loss": 0.00412105955183506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07289911806583405, "epoch": 8.39, "learning_rate": 4.533880175657419e-05, "loss": 0.0812, "step": 8832, "task_loss": 0.15584085881710052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.103610560297966, "epoch": 8.39, "learning_rate": 4.533260396852646e-05, "loss": 0.1056, "step": 8833, "task_loss": 0.12309184670448303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1255352646112442, "epoch": 8.39, "learning_rate": 4.532640248696331e-05, "loss": 0.1261, "step": 8834, "task_loss": 0.1312541514635086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028518887236714363, "epoch": 8.39, "learning_rate": 4.532019731301125e-05, "loss": 0.028, "step": 8835, "task_loss": 0.02283020317554474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04137152433395386, "epoch": 8.39, "learning_rate": 4.531398844779749e-05, "loss": 0.0637, "step": 8836, "task_loss": 0.26472553610801697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13369058072566986, "epoch": 8.39, "learning_rate": 4.530777589244989e-05, "loss": 0.142, "step": 8837, "task_loss": 0.21714474260807037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07951021194458008, "epoch": 8.39, "learning_rate": 4.5301559648096995e-05, "loss": 0.0786, "step": 8838, "task_loss": 0.0705207884311676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08249984681606293, "epoch": 8.39, "learning_rate": 4.529533971586802e-05, "loss": 0.0865, "step": 8839, "task_loss": 0.12243272364139557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06132117658853531, "epoch": 8.4, "learning_rate": 4.5289116096892834e-05, "loss": 0.0566, "step": 8840, "task_loss": 0.014352064579725266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035899195820093155, "epoch": 8.4, "learning_rate": 4.5282888792302e-05, "loss": 0.0432, "step": 8841, "task_loss": 0.10917837917804718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03924690932035446, "epoch": 8.4, "learning_rate": 4.527665780322674e-05, "loss": 0.042, "step": 8842, "task_loss": 0.06675869226455688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08691535890102386, "epoch": 8.4, "learning_rate": 4.527042313079893e-05, "loss": 0.0855, "step": 8843, "task_loss": 0.07236799597740173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03491726517677307, "epoch": 8.4, "learning_rate": 4.526418477615114e-05, "loss": 0.0329, "step": 8844, "task_loss": 0.014603780582547188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026119505986571312, "epoch": 8.4, "learning_rate": 4.525794274041658e-05, "loss": 0.0403, "step": 8845, "task_loss": 0.16816593706607819 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025338448584079742, "epoch": 8.4, "learning_rate": 4.5251697024729165e-05, "loss": 0.0233, "step": 8846, "task_loss": 0.0048566292971372604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038359981030225754, "epoch": 8.4, "learning_rate": 4.524544763022346e-05, "loss": 0.0368, "step": 8847, "task_loss": 0.022422099485993385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.20507895946502686, "epoch": 8.4, "learning_rate": 4.523919455803468e-05, "loss": 0.1902, "step": 8848, "task_loss": 0.05593420937657356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02473161742091179, "epoch": 8.4, "learning_rate": 4.5232937809298734e-05, "loss": 0.0301, "step": 8849, "task_loss": 0.07794321328401566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01353430561721325, "epoch": 8.4, "learning_rate": 4.5226677385152206e-05, "loss": 0.0133, "step": 8850, "task_loss": 0.011057652533054352 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029346950352191925, "epoch": 8.41, "learning_rate": 4.522041328673231e-05, "loss": 0.0364, "step": 8851, "task_loss": 0.09976962208747864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09129992872476578, "epoch": 8.41, "learning_rate": 4.521414551517695e-05, "loss": 0.0922, "step": 8852, "task_loss": 0.10062223672866821 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0642009973526001, "epoch": 8.41, "learning_rate": 4.520787407162471e-05, "loss": 0.0722, "step": 8853, "task_loss": 0.14382028579711914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02546757459640503, "epoch": 8.41, "learning_rate": 4.520159895721483e-05, "loss": 0.0233, "step": 8854, "task_loss": 0.0041826870292425156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1266893744468689, "epoch": 8.41, "learning_rate": 4.51953201730872e-05, "loss": 0.1269, "step": 8855, "task_loss": 0.12863604724407196 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04428763687610626, "epoch": 8.41, "learning_rate": 4.51890377203824e-05, "loss": 0.0412, "step": 8856, "task_loss": 0.013243492692708969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008449184708297253, "epoch": 8.41, "learning_rate": 4.518275160024167e-05, "loss": 0.0082, "step": 8857, "task_loss": 0.006423516198992729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08089472353458405, "epoch": 8.41, "learning_rate": 4.5176461813806904e-05, "loss": 0.0796, "step": 8858, "task_loss": 0.0684363842010498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03204871341586113, "epoch": 8.41, "learning_rate": 4.5170168362220686e-05, "loss": 0.0306, "step": 8859, "task_loss": 0.017244910821318626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04458446055650711, "epoch": 8.41, "learning_rate": 4.516387124662624e-05, "loss": 0.048, "step": 8860, "task_loss": 0.07903842628002167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01822434365749359, "epoch": 8.42, "learning_rate": 4.5157570468167464e-05, "loss": 0.0169, "step": 8861, "task_loss": 0.005464507266879082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03550713509321213, "epoch": 8.42, "learning_rate": 4.5151266027988946e-05, "loss": 0.0333, "step": 8862, "task_loss": 0.013436004519462585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028216678649187088, "epoch": 8.42, "learning_rate": 4.51449579272359e-05, "loss": 0.0333, "step": 8863, "task_loss": 0.07930795848369598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01838553324341774, "epoch": 8.42, "learning_rate": 4.5138646167054224e-05, "loss": 0.017, "step": 8864, "task_loss": 0.004559867084026337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08109049499034882, "epoch": 8.42, "learning_rate": 4.513233074859049e-05, "loss": 0.0859, "step": 8865, "task_loss": 0.12965066730976105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016361292451620102, "epoch": 8.42, "learning_rate": 4.512601167299191e-05, "loss": 0.0212, "step": 8866, "task_loss": 0.06487929821014404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011701793409883976, "epoch": 8.42, "learning_rate": 4.511968894140639e-05, "loss": 0.0264, "step": 8867, "task_loss": 0.15838071703910828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06448012590408325, "epoch": 8.42, "learning_rate": 4.511336255498247e-05, "loss": 0.0657, "step": 8868, "task_loss": 0.07717397809028625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028120938688516617, "epoch": 8.42, "learning_rate": 4.510703251486937e-05, "loss": 0.026, "step": 8869, "task_loss": 0.006957884877920151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10039125382900238, "epoch": 8.42, "learning_rate": 4.5100698822216984e-05, "loss": 0.1005, "step": 8870, "task_loss": 0.1014624685049057 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019519591704010963, "epoch": 8.42, "learning_rate": 4.509436147817585e-05, "loss": 0.018, "step": 8871, "task_loss": 0.0039330217987298965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.098009392619133, "epoch": 8.43, "learning_rate": 4.5088020483897184e-05, "loss": 0.102, "step": 8872, "task_loss": 0.13752353191375732 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09184578061103821, "epoch": 8.43, "learning_rate": 4.508167584053285e-05, "loss": 0.0878, "step": 8873, "task_loss": 0.05179011821746826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03356043994426727, "epoch": 8.43, "learning_rate": 4.507532754923537e-05, "loss": 0.0388, "step": 8874, "task_loss": 0.0859462320804596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08062370121479034, "epoch": 8.43, "learning_rate": 4.506897561115797e-05, "loss": 0.0896, "step": 8875, "task_loss": 0.17005644738674164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0661439374089241, "epoch": 8.43, "learning_rate": 4.506262002745449e-05, "loss": 0.0845, "step": 8876, "task_loss": 0.25010010600090027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04141310602426529, "epoch": 8.43, "learning_rate": 4.505626079927947e-05, "loss": 0.0399, "step": 8877, "task_loss": 0.025958728045225143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05498037487268448, "epoch": 8.43, "learning_rate": 4.504989792778808e-05, "loss": 0.0538, "step": 8878, "task_loss": 0.04316407069563866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05957046523690224, "epoch": 8.43, "learning_rate": 4.504353141413616e-05, "loss": 0.0686, "step": 8879, "task_loss": 0.14945709705352783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03215145319700241, "epoch": 8.43, "learning_rate": 4.5037161259480246e-05, "loss": 0.0435, "step": 8880, "task_loss": 0.14541016519069672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03889644145965576, "epoch": 8.43, "learning_rate": 4.5030787464977476e-05, "loss": 0.036, "step": 8881, "task_loss": 0.009727858006954193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028448857367038727, "epoch": 8.43, "learning_rate": 4.50244100317857e-05, "loss": 0.0262, "step": 8882, "task_loss": 0.005950525403022766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05188886076211929, "epoch": 8.44, "learning_rate": 4.5018028961063394e-05, "loss": 0.0695, "step": 8883, "task_loss": 0.22816702723503113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0323837585747242, "epoch": 8.44, "learning_rate": 4.501164425396973e-05, "loss": 0.0389, "step": 8884, "task_loss": 0.0973554477095604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06097978726029396, "epoch": 8.44, "learning_rate": 4.5005255911664507e-05, "loss": 0.0569, "step": 8885, "task_loss": 0.02063034474849701 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021500347182154655, "epoch": 8.44, "learning_rate": 4.49988639353082e-05, "loss": 0.0345, "step": 8886, "task_loss": 0.1510607749223709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.053475480526685715, "epoch": 8.44, "learning_rate": 4.4992468326061944e-05, "loss": 0.0527, "step": 8887, "task_loss": 0.04602964594960213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026177948340773582, "epoch": 8.44, "learning_rate": 4.498606908508754e-05, "loss": 0.0313, "step": 8888, "task_loss": 0.07776033878326416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1273716241121292, "epoch": 8.44, "learning_rate": 4.4979666213547414e-05, "loss": 0.1253, "step": 8889, "task_loss": 0.10640902817249298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028834780678153038, "epoch": 8.44, "learning_rate": 4.497325971260471e-05, "loss": 0.0271, "step": 8890, "task_loss": 0.011695507913827896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03636486828327179, "epoch": 8.44, "learning_rate": 4.496684958342319e-05, "loss": 0.0339, "step": 8891, "task_loss": 0.011735286563634872 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12257404625415802, "epoch": 8.44, "learning_rate": 4.4960435827167266e-05, "loss": 0.123, "step": 8892, "task_loss": 0.12667058408260345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027225444093346596, "epoch": 8.45, "learning_rate": 4.495401844500205e-05, "loss": 0.0297, "step": 8893, "task_loss": 0.051733896136283875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022315477952361107, "epoch": 8.45, "learning_rate": 4.494759743809329e-05, "loss": 0.025, "step": 8894, "task_loss": 0.04938486963510513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05692111700773239, "epoch": 8.45, "learning_rate": 4.494117280760739e-05, "loss": 0.0631, "step": 8895, "task_loss": 0.11854865401983261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017605263739824295, "epoch": 8.45, "learning_rate": 4.49347445547114e-05, "loss": 0.0404, "step": 8896, "task_loss": 0.24594470858573914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13257010281085968, "epoch": 8.45, "learning_rate": 4.4928312680573064e-05, "loss": 0.1377, "step": 8897, "task_loss": 0.18380433320999146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0691106989979744, "epoch": 8.45, "learning_rate": 4.492187718636075e-05, "loss": 0.0788, "step": 8898, "task_loss": 0.16619594395160675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03699222952127457, "epoch": 8.45, "learning_rate": 4.49154380732435e-05, "loss": 0.048, "step": 8899, "task_loss": 0.14703547954559326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11394920945167542, "epoch": 8.45, "learning_rate": 4.490899534239101e-05, "loss": 0.1091, "step": 8900, "task_loss": 0.06529615074396133 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05552458390593529, "epoch": 8.45, "learning_rate": 4.490254899497364e-05, "loss": 0.0599, "step": 8901, "task_loss": 0.0996512770652771 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0950566753745079, "epoch": 8.45, "learning_rate": 4.4896099032162386e-05, "loss": 0.0994, "step": 8902, "task_loss": 0.13881003856658936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029427465051412582, "epoch": 8.45, "learning_rate": 4.488964545512892e-05, "loss": 0.0426, "step": 8903, "task_loss": 0.16067925095558167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04379139095544815, "epoch": 8.46, "learning_rate": 4.488318826504557e-05, "loss": 0.0431, "step": 8904, "task_loss": 0.03657126426696777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.053460512310266495, "epoch": 8.46, "learning_rate": 4.4876727463085324e-05, "loss": 0.0526, "step": 8905, "task_loss": 0.04512707516551018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036254942417144775, "epoch": 8.46, "learning_rate": 4.487026305042179e-05, "loss": 0.0371, "step": 8906, "task_loss": 0.045052558183670044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015025094151496887, "epoch": 8.46, "learning_rate": 4.4863795028229286e-05, "loss": 0.0215, "step": 8907, "task_loss": 0.07951157540082932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01650727353990078, "epoch": 8.46, "learning_rate": 4.4857323397682746e-05, "loss": 0.0306, "step": 8908, "task_loss": 0.15717414021492004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06591572612524033, "epoch": 8.46, "learning_rate": 4.485084815995778e-05, "loss": 0.064, "step": 8909, "task_loss": 0.04637327417731285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09855660796165466, "epoch": 8.46, "learning_rate": 4.484436931623064e-05, "loss": 0.0949, "step": 8910, "task_loss": 0.06240401789546013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027789302170276642, "epoch": 8.46, "learning_rate": 4.4837886867678245e-05, "loss": 0.0327, "step": 8911, "task_loss": 0.07687053829431534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07295016199350357, "epoch": 8.46, "learning_rate": 4.4831400815478164e-05, "loss": 0.0882, "step": 8912, "task_loss": 0.2257649004459381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0313107855618, "epoch": 8.46, "learning_rate": 4.482491116080861e-05, "loss": 0.0417, "step": 8913, "task_loss": 0.1352921426296234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020768383517861366, "epoch": 8.47, "learning_rate": 4.4818417904848466e-05, "loss": 0.036, "step": 8914, "task_loss": 0.17357057332992554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11295350641012192, "epoch": 8.47, "learning_rate": 4.481192104877726e-05, "loss": 0.1166, "step": 8915, "task_loss": 0.14907796680927277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021822160109877586, "epoch": 8.47, "learning_rate": 4.480542059377519e-05, "loss": 0.0222, "step": 8916, "task_loss": 0.025608519092202187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02147882990539074, "epoch": 8.47, "learning_rate": 4.479891654102307e-05, "loss": 0.02, "step": 8917, "task_loss": 0.006244117394089699 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05350108444690704, "epoch": 8.47, "learning_rate": 4.4792408891702426e-05, "loss": 0.0564, "step": 8918, "task_loss": 0.08280838280916214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09106861054897308, "epoch": 8.47, "learning_rate": 4.4785897646995376e-05, "loss": 0.0881, "step": 8919, "task_loss": 0.06174633651971817 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028143450617790222, "epoch": 8.47, "learning_rate": 4.477938280808473e-05, "loss": 0.0261, "step": 8920, "task_loss": 0.007531605660915375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028097132220864296, "epoch": 8.47, "learning_rate": 4.4772864376153936e-05, "loss": 0.0306, "step": 8921, "task_loss": 0.05265646427869797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029321586713194847, "epoch": 8.47, "learning_rate": 4.4766342352387106e-05, "loss": 0.027, "step": 8922, "task_loss": 0.006089037284255028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06918927282094955, "epoch": 8.47, "learning_rate": 4.475981673796899e-05, "loss": 0.0718, "step": 8923, "task_loss": 0.09525958448648453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07651045173406601, "epoch": 8.47, "learning_rate": 4.475328753408499e-05, "loss": 0.0909, "step": 8924, "task_loss": 0.22058546543121338 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12579286098480225, "epoch": 8.48, "learning_rate": 4.474675474192119e-05, "loss": 0.1404, "step": 8925, "task_loss": 0.2715792953968048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0216844342648983, "epoch": 8.48, "learning_rate": 4.4740218362664276e-05, "loss": 0.0204, "step": 8926, "task_loss": 0.008529262617230415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07038342207670212, "epoch": 8.48, "learning_rate": 4.473367839750165e-05, "loss": 0.0718, "step": 8927, "task_loss": 0.0848298892378807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059562116861343384, "epoch": 8.48, "learning_rate": 4.4727134847621276e-05, "loss": 0.0718, "step": 8928, "task_loss": 0.18145687878131866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07572756707668304, "epoch": 8.48, "learning_rate": 4.4720587714211863e-05, "loss": 0.0747, "step": 8929, "task_loss": 0.06551861763000488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06999503076076508, "epoch": 8.48, "learning_rate": 4.471403699846272e-05, "loss": 0.0659, "step": 8930, "task_loss": 0.0290969330817461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028632836416363716, "epoch": 8.48, "learning_rate": 4.470748270156381e-05, "loss": 0.0337, "step": 8931, "task_loss": 0.07966382801532745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10344910621643066, "epoch": 8.48, "learning_rate": 4.4700924824705745e-05, "loss": 0.1, "step": 8932, "task_loss": 0.06940016150474548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09662455320358276, "epoch": 8.48, "learning_rate": 4.469436336907982e-05, "loss": 0.0968, "step": 8933, "task_loss": 0.09881898760795593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08603687584400177, "epoch": 8.48, "learning_rate": 4.4687798335877936e-05, "loss": 0.0844, "step": 8934, "task_loss": 0.06977303326129913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05292690172791481, "epoch": 8.49, "learning_rate": 4.4681229726292664e-05, "loss": 0.0519, "step": 8935, "task_loss": 0.04311305657029152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019087469205260277, "epoch": 8.49, "learning_rate": 4.4674657541517227e-05, "loss": 0.0191, "step": 8936, "task_loss": 0.018771233037114143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01726618967950344, "epoch": 8.49, "learning_rate": 4.466808178274549e-05, "loss": 0.0225, "step": 8937, "task_loss": 0.06974489986896515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07852751016616821, "epoch": 8.49, "learning_rate": 4.4661502451171975e-05, "loss": 0.0891, "step": 8938, "task_loss": 0.18472985923290253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015991097316145897, "epoch": 8.49, "learning_rate": 4.465491954799186e-05, "loss": 0.0148, "step": 8939, "task_loss": 0.004459971562027931 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07318315654993057, "epoch": 8.49, "learning_rate": 4.4648333074400936e-05, "loss": 0.0713, "step": 8940, "task_loss": 0.054741598665714264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04243193194270134, "epoch": 8.49, "learning_rate": 4.464174303159569e-05, "loss": 0.0503, "step": 8941, "task_loss": 0.12083330750465393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.18897034227848053, "epoch": 8.49, "learning_rate": 4.463514942077323e-05, "loss": 0.1905, "step": 8942, "task_loss": 0.20461271703243256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09098904579877853, "epoch": 8.49, "learning_rate": 4.4628552243131304e-05, "loss": 0.0979, "step": 8943, "task_loss": 0.16002698242664337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021895185112953186, "epoch": 8.49, "learning_rate": 4.462195149986833e-05, "loss": 0.0259, "step": 8944, "task_loss": 0.061538372188806534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05251630023121834, "epoch": 8.49, "learning_rate": 4.4615347192183375e-05, "loss": 0.0511, "step": 8945, "task_loss": 0.038724854588508606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0883500799536705, "epoch": 8.5, "learning_rate": 4.4608739321276126e-05, "loss": 0.0855, "step": 8946, "task_loss": 0.060159243643283844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0961025282740593, "epoch": 8.5, "learning_rate": 4.4602127888346944e-05, "loss": 0.0918, "step": 8947, "task_loss": 0.05281839519739151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07368957996368408, "epoch": 8.5, "learning_rate": 4.459551289459684e-05, "loss": 0.0714, "step": 8948, "task_loss": 0.05031800642609596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06434916704893112, "epoch": 8.5, "learning_rate": 4.4588894341227426e-05, "loss": 0.0588, "step": 8949, "task_loss": 0.00913977436721325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04223328083753586, "epoch": 8.5, "learning_rate": 4.4582272229441024e-05, "loss": 0.0406, "step": 8950, "task_loss": 0.02632623352110386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13263371586799622, "epoch": 8.5, "learning_rate": 4.457564656044056e-05, "loss": 0.1389, "step": 8951, "task_loss": 0.1952933669090271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019756224006414413, "epoch": 8.5, "learning_rate": 4.456901733542962e-05, "loss": 0.0185, "step": 8952, "task_loss": 0.007240481674671173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1520935297012329, "epoch": 8.5, "learning_rate": 4.4562384555612436e-05, "loss": 0.1599, "step": 8953, "task_loss": 0.2299913763999939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10879489779472351, "epoch": 8.5, "learning_rate": 4.455574822219388e-05, "loss": 0.1219, "step": 8954, "task_loss": 0.23940874636173248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0421454980969429, "epoch": 8.5, "learning_rate": 4.454910833637949e-05, "loss": 0.0453, "step": 8955, "task_loss": 0.07326261699199677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1279737502336502, "epoch": 8.51, "learning_rate": 4.454246489937541e-05, "loss": 0.1322, "step": 8956, "task_loss": 0.1699049174785614 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1273374855518341, "epoch": 8.51, "learning_rate": 4.4535817912388466e-05, "loss": 0.1269, "step": 8957, "task_loss": 0.12270753085613251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03381875157356262, "epoch": 8.51, "learning_rate": 4.4529167376626116e-05, "loss": 0.061, "step": 8958, "task_loss": 0.3059147894382477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035635169595479965, "epoch": 8.51, "learning_rate": 4.4522513293296456e-05, "loss": 0.0395, "step": 8959, "task_loss": 0.07383869588375092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08485317975282669, "epoch": 8.51, "learning_rate": 4.451585566360823e-05, "loss": 0.0817, "step": 8960, "task_loss": 0.053764086216688156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041285235434770584, "epoch": 8.51, "learning_rate": 4.450919448877084e-05, "loss": 0.0389, "step": 8961, "task_loss": 0.017382560297846794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01698409393429756, "epoch": 8.51, "learning_rate": 4.4502529769994314e-05, "loss": 0.0262, "step": 8962, "task_loss": 0.10962288081645966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15635554492473602, "epoch": 8.51, "learning_rate": 4.449586150848934e-05, "loss": 0.157, "step": 8963, "task_loss": 0.16323071718215942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016910862177610397, "epoch": 8.51, "learning_rate": 4.44891897054672e-05, "loss": 0.0221, "step": 8964, "task_loss": 0.06874377280473709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018827490508556366, "epoch": 8.51, "learning_rate": 4.4482514362139915e-05, "loss": 0.0173, "step": 8965, "task_loss": 0.003253905102610588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044889599084854126, "epoch": 8.51, "learning_rate": 4.4475835479720065e-05, "loss": 0.0419, "step": 8966, "task_loss": 0.014503007754683495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027362622320652008, "epoch": 8.52, "learning_rate": 4.4469153059420895e-05, "loss": 0.0301, "step": 8967, "task_loss": 0.054887376725673676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021328095346689224, "epoch": 8.52, "learning_rate": 4.4462467102456305e-05, "loss": 0.0198, "step": 8968, "task_loss": 0.005754020065069199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03476056456565857, "epoch": 8.52, "learning_rate": 4.4455777610040846e-05, "loss": 0.0323, "step": 8969, "task_loss": 0.010109812021255493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09709491580724716, "epoch": 8.52, "learning_rate": 4.444908458338968e-05, "loss": 0.0945, "step": 8970, "task_loss": 0.07128534466028214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037777457386255264, "epoch": 8.52, "learning_rate": 4.4442388023718624e-05, "loss": 0.0562, "step": 8971, "task_loss": 0.2219763994216919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05716599524021149, "epoch": 8.52, "learning_rate": 4.443568793224415e-05, "loss": 0.0549, "step": 8972, "task_loss": 0.03474997729063034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03178703784942627, "epoch": 8.52, "learning_rate": 4.4428984310183364e-05, "loss": 0.0331, "step": 8973, "task_loss": 0.04533195123076439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06467156112194061, "epoch": 8.52, "learning_rate": 4.4422277158754005e-05, "loss": 0.0693, "step": 8974, "task_loss": 0.11103697121143341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1044667512178421, "epoch": 8.52, "learning_rate": 4.441556647917446e-05, "loss": 0.106, "step": 8975, "task_loss": 0.11955499649047852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06567876040935516, "epoch": 8.52, "learning_rate": 4.440885227266376e-05, "loss": 0.0679, "step": 8976, "task_loss": 0.08836454898118973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09253351390361786, "epoch": 8.53, "learning_rate": 4.440213454044158e-05, "loss": 0.0892, "step": 8977, "task_loss": 0.05876833572983742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02796206995844841, "epoch": 8.53, "learning_rate": 4.43954132837282e-05, "loss": 0.0414, "step": 8978, "task_loss": 0.16239336133003235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07454772293567657, "epoch": 8.53, "learning_rate": 4.43886885037446e-05, "loss": 0.0779, "step": 8979, "task_loss": 0.10792988538742065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08012815564870834, "epoch": 8.53, "learning_rate": 4.438196020171235e-05, "loss": 0.0856, "step": 8980, "task_loss": 0.13467732071876526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06739650666713715, "epoch": 8.53, "learning_rate": 4.437522837885369e-05, "loss": 0.0743, "step": 8981, "task_loss": 0.13656720519065857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039198122918605804, "epoch": 8.53, "learning_rate": 4.436849303639148e-05, "loss": 0.0449, "step": 8982, "task_loss": 0.09616616368293762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02301427721977234, "epoch": 8.53, "learning_rate": 4.436175417554923e-05, "loss": 0.0213, "step": 8983, "task_loss": 0.005967108532786369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025122862309217453, "epoch": 8.53, "learning_rate": 4.4355011797551086e-05, "loss": 0.0323, "step": 8984, "task_loss": 0.09659717977046967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06348798424005508, "epoch": 8.53, "learning_rate": 4.4348265903621844e-05, "loss": 0.0593, "step": 8985, "task_loss": 0.02176908776164055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024741999804973602, "epoch": 8.53, "learning_rate": 4.4341516494986904e-05, "loss": 0.029, "step": 8986, "task_loss": 0.06750153005123138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022810565307736397, "epoch": 8.53, "learning_rate": 4.433476357287235e-05, "loss": 0.029, "step": 8987, "task_loss": 0.08470144867897034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03231749311089516, "epoch": 8.54, "learning_rate": 4.432800713850488e-05, "loss": 0.04, "step": 8988, "task_loss": 0.10918974876403809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08315695822238922, "epoch": 8.54, "learning_rate": 4.432124719311182e-05, "loss": 0.0804, "step": 8989, "task_loss": 0.056007783859968185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07280036807060242, "epoch": 8.54, "learning_rate": 4.431448373792116e-05, "loss": 0.0704, "step": 8990, "task_loss": 0.048897065222263336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023964036256074905, "epoch": 8.54, "learning_rate": 4.430771677416151e-05, "loss": 0.0321, "step": 8991, "task_loss": 0.10551959276199341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012696181423962116, "epoch": 8.54, "learning_rate": 4.430094630306212e-05, "loss": 0.0121, "step": 8992, "task_loss": 0.006786322221159935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06884432584047318, "epoch": 8.54, "learning_rate": 4.429417232585288e-05, "loss": 0.0656, "step": 8993, "task_loss": 0.036455415189266205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03897576034069061, "epoch": 8.54, "learning_rate": 4.428739484376431e-05, "loss": 0.0436, "step": 8994, "task_loss": 0.08556883037090302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018597451969981194, "epoch": 8.54, "learning_rate": 4.4280613858027584e-05, "loss": 0.0173, "step": 8995, "task_loss": 0.005144355818629265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02616756409406662, "epoch": 8.54, "learning_rate": 4.427382936987449e-05, "loss": 0.0291, "step": 8996, "task_loss": 0.05589302256703377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02272038534283638, "epoch": 8.54, "learning_rate": 4.426704138053747e-05, "loss": 0.0209, "step": 8997, "task_loss": 0.004764288663864136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017667189240455627, "epoch": 8.55, "learning_rate": 4.426024989124959e-05, "loss": 0.0162, "step": 8998, "task_loss": 0.00283648818731308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09155499935150146, "epoch": 8.55, "learning_rate": 4.425345490324456e-05, "loss": 0.0864, "step": 8999, "task_loss": 0.04047724977135658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0898907482624054, "epoch": 8.55, "learning_rate": 4.424665641775673e-05, "loss": 0.0954, "step": 9000, "task_loss": 0.14472734928131104 }, { "epoch": 8.55, "eval_accuracy": 0.893348623853211, "eval_loss": 0.4816896319389343, "eval_runtime": 18.2944, "eval_samples_per_second": 47.665, "eval_steps_per_second": 5.958, "step": 9000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1226925477385521, "epoch": 8.55, "learning_rate": 4.4239854436021056e-05, "loss": 0.1208, "step": 9001, "task_loss": 0.10388394445180893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07912658900022507, "epoch": 8.55, "learning_rate": 4.423304895927317e-05, "loss": 0.0727, "step": 9002, "task_loss": 0.014653431251645088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07628723978996277, "epoch": 8.55, "learning_rate": 4.4226239988749305e-05, "loss": 0.0717, "step": 9003, "task_loss": 0.030388563871383667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17722564935684204, "epoch": 8.55, "learning_rate": 4.4219427525686366e-05, "loss": 0.1661, "step": 9004, "task_loss": 0.06582056730985641 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01308794878423214, "epoch": 8.55, "learning_rate": 4.421261157132185e-05, "loss": 0.0245, "step": 9005, "task_loss": 0.1268969178199768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06603103131055832, "epoch": 8.55, "learning_rate": 4.4205792126893905e-05, "loss": 0.0753, "step": 9006, "task_loss": 0.15897931158542633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.033943966031074524, "epoch": 8.55, "learning_rate": 4.4198969193641324e-05, "loss": 0.0317, "step": 9007, "task_loss": 0.01145954243838787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09488087147474289, "epoch": 8.55, "learning_rate": 4.4192142772803535e-05, "loss": 0.0931, "step": 9008, "task_loss": 0.07663992047309875 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04525774344801903, "epoch": 8.56, "learning_rate": 4.4185312865620575e-05, "loss": 0.0469, "step": 9009, "task_loss": 0.06158378720283508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022823676466941833, "epoch": 8.56, "learning_rate": 4.417847947333314e-05, "loss": 0.0212, "step": 9010, "task_loss": 0.006658636033535004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017958180978894234, "epoch": 8.56, "learning_rate": 4.417164259718254e-05, "loss": 0.0167, "step": 9011, "task_loss": 0.005114752799272537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12965308129787445, "epoch": 8.56, "learning_rate": 4.416480223841073e-05, "loss": 0.1233, "step": 9012, "task_loss": 0.06576710194349289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08245688676834106, "epoch": 8.56, "learning_rate": 4.4157958398260294e-05, "loss": 0.0786, "step": 9013, "task_loss": 0.04379882290959358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012385990470647812, "epoch": 8.56, "learning_rate": 4.415111107797445e-05, "loss": 0.0117, "step": 9014, "task_loss": 0.005246100947260857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08853226900100708, "epoch": 8.56, "learning_rate": 4.414426027879705e-05, "loss": 0.0933, "step": 9015, "task_loss": 0.13666236400604248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06031012162566185, "epoch": 8.56, "learning_rate": 4.413740600197257e-05, "loss": 0.0607, "step": 9016, "task_loss": 0.06431614607572556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044511131942272186, "epoch": 8.56, "learning_rate": 4.413054824874612e-05, "loss": 0.062, "step": 9017, "task_loss": 0.21939462423324585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039623238146305084, "epoch": 8.56, "learning_rate": 4.412368702036345e-05, "loss": 0.0482, "step": 9018, "task_loss": 0.12542547285556793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2884577512741089, "epoch": 8.57, "learning_rate": 4.4116822318070925e-05, "loss": 0.2897, "step": 9019, "task_loss": 0.3008236885070801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044239118695259094, "epoch": 8.57, "learning_rate": 4.4109954143115565e-05, "loss": 0.0403, "step": 9020, "task_loss": 0.004376189783215523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01869271881878376, "epoch": 8.57, "learning_rate": 4.4103082496745e-05, "loss": 0.0232, "step": 9021, "task_loss": 0.06380611658096313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0486907958984375, "epoch": 8.57, "learning_rate": 4.40962073802075e-05, "loss": 0.0468, "step": 9022, "task_loss": 0.03009847365319729 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01841585896909237, "epoch": 8.57, "learning_rate": 4.4089328794751954e-05, "loss": 0.0171, "step": 9023, "task_loss": 0.005129978060722351 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0398845374584198, "epoch": 8.57, "learning_rate": 4.4082446741627906e-05, "loss": 0.0362, "step": 9024, "task_loss": 0.003042936325073242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.21794645488262177, "epoch": 8.57, "learning_rate": 4.40755612220855e-05, "loss": 0.2127, "step": 9025, "task_loss": 0.165152907371521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07094389200210571, "epoch": 8.57, "learning_rate": 4.406867223737553e-05, "loss": 0.0752, "step": 9026, "task_loss": 0.11325624585151672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10760428756475449, "epoch": 8.57, "learning_rate": 4.406177978874941e-05, "loss": 0.1094, "step": 9027, "task_loss": 0.1256953477859497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07674893736839294, "epoch": 8.57, "learning_rate": 4.405488387745919e-05, "loss": 0.0779, "step": 9028, "task_loss": 0.08849011361598969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05093570053577423, "epoch": 8.57, "learning_rate": 4.4047984504757544e-05, "loss": 0.0768, "step": 9029, "task_loss": 0.3099355101585388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02506938949227333, "epoch": 8.58, "learning_rate": 4.4041081671897775e-05, "loss": 0.0271, "step": 9030, "task_loss": 0.04571164399385452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018875867128372192, "epoch": 8.58, "learning_rate": 4.403417538013382e-05, "loss": 0.0319, "step": 9031, "task_loss": 0.14952301979064941 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02852119505405426, "epoch": 8.58, "learning_rate": 4.402726563072024e-05, "loss": 0.0358, "step": 9032, "task_loss": 0.10161833465099335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022057583555579185, "epoch": 8.58, "learning_rate": 4.4020352424912226e-05, "loss": 0.0208, "step": 9033, "task_loss": 0.009751364588737488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009804684668779373, "epoch": 8.58, "learning_rate": 4.401343576396558e-05, "loss": 0.0092, "step": 9034, "task_loss": 0.0038125887513160706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05322907119989395, "epoch": 8.58, "learning_rate": 4.400651564913676e-05, "loss": 0.053, "step": 9035, "task_loss": 0.05118084326386452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01981445588171482, "epoch": 8.58, "learning_rate": 4.399959208168284e-05, "loss": 0.0281, "step": 9036, "task_loss": 0.10224159061908722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03673945739865303, "epoch": 8.58, "learning_rate": 4.3992665062861514e-05, "loss": 0.0404, "step": 9037, "task_loss": 0.07305504381656647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11148297786712646, "epoch": 8.58, "learning_rate": 4.398573459393111e-05, "loss": 0.1081, "step": 9038, "task_loss": 0.07762963324785233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11146679520606995, "epoch": 8.58, "learning_rate": 4.3978800676150575e-05, "loss": 0.108, "step": 9039, "task_loss": 0.0765286237001419 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03709033504128456, "epoch": 8.58, "learning_rate": 4.39718633107795e-05, "loss": 0.0438, "step": 9040, "task_loss": 0.10385788232088089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038635239005088806, "epoch": 8.59, "learning_rate": 4.3964922499078084e-05, "loss": 0.0372, "step": 9041, "task_loss": 0.02382836863398552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051188208162784576, "epoch": 8.59, "learning_rate": 4.3957978242307166e-05, "loss": 0.0502, "step": 9042, "task_loss": 0.04129528999328613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028153028339147568, "epoch": 8.59, "learning_rate": 4.395103054172819e-05, "loss": 0.0258, "step": 9043, "task_loss": 0.004962790757417679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022645195946097374, "epoch": 8.59, "learning_rate": 4.394407939860325e-05, "loss": 0.0212, "step": 9044, "task_loss": 0.008643986657261848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013338697142899036, "epoch": 8.59, "learning_rate": 4.3937124814195054e-05, "loss": 0.0125, "step": 9045, "task_loss": 0.004968065768480301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05772208422422409, "epoch": 8.59, "learning_rate": 4.393016678976692e-05, "loss": 0.0609, "step": 9046, "task_loss": 0.08959686756134033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03842834755778313, "epoch": 8.59, "learning_rate": 4.3923205326582837e-05, "loss": 0.0449, "step": 9047, "task_loss": 0.10268554091453552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03277190029621124, "epoch": 8.59, "learning_rate": 4.3916240425907364e-05, "loss": 0.0405, "step": 9048, "task_loss": 0.11014024913311005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020059367641806602, "epoch": 8.59, "learning_rate": 4.3909272089005714e-05, "loss": 0.0194, "step": 9049, "task_loss": 0.01322341151535511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2156301587820053, "epoch": 8.59, "learning_rate": 4.3902300317143726e-05, "loss": 0.2129, "step": 9050, "task_loss": 0.18819431960582733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02864878438413143, "epoch": 8.6, "learning_rate": 4.389532511158785e-05, "loss": 0.0264, "step": 9051, "task_loss": 0.006369665265083313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04067389667034149, "epoch": 8.6, "learning_rate": 4.388834647360516e-05, "loss": 0.0411, "step": 9052, "task_loss": 0.045049235224723816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019816577434539795, "epoch": 8.6, "learning_rate": 4.388136440446337e-05, "loss": 0.0244, "step": 9053, "task_loss": 0.06586334109306335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018740810453891754, "epoch": 8.6, "learning_rate": 4.387437890543081e-05, "loss": 0.0203, "step": 9054, "task_loss": 0.0338791161775589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03478642553091049, "epoch": 8.6, "learning_rate": 4.3867389977776416e-05, "loss": 0.0369, "step": 9055, "task_loss": 0.056206680834293365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06599288433790207, "epoch": 8.6, "learning_rate": 4.3860397622769756e-05, "loss": 0.0698, "step": 9056, "task_loss": 0.10445068776607513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06809773296117783, "epoch": 8.6, "learning_rate": 4.3853401841681046e-05, "loss": 0.0656, "step": 9057, "task_loss": 0.04264071583747864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0191783644258976, "epoch": 8.6, "learning_rate": 4.3846402635781093e-05, "loss": 0.0178, "step": 9058, "task_loss": 0.005517646670341492 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020288830623030663, "epoch": 8.6, "learning_rate": 4.3839400006341335e-05, "loss": 0.0208, "step": 9059, "task_loss": 0.025135664269328117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02526734210550785, "epoch": 8.6, "learning_rate": 4.383239395463383e-05, "loss": 0.0395, "step": 9060, "task_loss": 0.16764724254608154 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01698536053299904, "epoch": 8.6, "learning_rate": 4.382538448193127e-05, "loss": 0.0157, "step": 9061, "task_loss": 0.0042114946991205215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17208489775657654, "epoch": 8.61, "learning_rate": 4.381837158950695e-05, "loss": 0.179, "step": 9062, "task_loss": 0.2413436472415924 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03591246157884598, "epoch": 8.61, "learning_rate": 4.3811355278634804e-05, "loss": 0.0522, "step": 9063, "task_loss": 0.19905783236026764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03541406989097595, "epoch": 8.61, "learning_rate": 4.380433555058937e-05, "loss": 0.0331, "step": 9064, "task_loss": 0.011965034529566765 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1491689682006836, "epoch": 8.61, "learning_rate": 4.379731240664583e-05, "loss": 0.163, "step": 9065, "task_loss": 0.2871720790863037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015187927521765232, "epoch": 8.61, "learning_rate": 4.379028584807996e-05, "loss": 0.0142, "step": 9066, "task_loss": 0.005585832521319389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031154222786426544, "epoch": 8.61, "learning_rate": 4.3783255876168165e-05, "loss": 0.0329, "step": 9067, "task_loss": 0.04878431186079979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12299706786870956, "epoch": 8.61, "learning_rate": 4.377622249218748e-05, "loss": 0.135, "step": 9068, "task_loss": 0.24260729551315308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03098423220217228, "epoch": 8.61, "learning_rate": 4.376918569741556e-05, "loss": 0.0388, "step": 9069, "task_loss": 0.1092497706413269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02339218370616436, "epoch": 8.61, "learning_rate": 4.376214549313066e-05, "loss": 0.0218, "step": 9070, "task_loss": 0.007723584771156311 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0801115334033966, "epoch": 8.61, "learning_rate": 4.375510188061167e-05, "loss": 0.0918, "step": 9071, "task_loss": 0.1967240571975708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0836428701877594, "epoch": 8.62, "learning_rate": 4.37480548611381e-05, "loss": 0.0883, "step": 9072, "task_loss": 0.1297881305217743 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0943288654088974, "epoch": 8.62, "learning_rate": 4.374100443599007e-05, "loss": 0.0906, "step": 9073, "task_loss": 0.0567990243434906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07607637345790863, "epoch": 8.62, "learning_rate": 4.3733950606448324e-05, "loss": 0.0873, "step": 9074, "task_loss": 0.18788626790046692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11874774098396301, "epoch": 8.62, "learning_rate": 4.3726893373794234e-05, "loss": 0.1183, "step": 9075, "task_loss": 0.11419327557086945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021572759374976158, "epoch": 8.62, "learning_rate": 4.3719832739309766e-05, "loss": 0.0243, "step": 9076, "task_loss": 0.04849618673324585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026811379939317703, "epoch": 8.62, "learning_rate": 4.371276870427753e-05, "loss": 0.0278, "step": 9077, "task_loss": 0.03716592118144035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008828360587358475, "epoch": 8.62, "learning_rate": 4.3705701269980734e-05, "loss": 0.0082, "step": 9078, "task_loss": 0.0030191540718078613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05028454214334488, "epoch": 8.62, "learning_rate": 4.369863043770322e-05, "loss": 0.0475, "step": 9079, "task_loss": 0.022545762360095978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047442540526390076, "epoch": 8.62, "learning_rate": 4.369155620872943e-05, "loss": 0.0435, "step": 9080, "task_loss": 0.007748594507575035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014092406257987022, "epoch": 8.62, "learning_rate": 4.3684478584344433e-05, "loss": 0.0131, "step": 9081, "task_loss": 0.004466302692890167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07513783872127533, "epoch": 8.62, "learning_rate": 4.367739756583392e-05, "loss": 0.0833, "step": 9082, "task_loss": 0.1565927267074585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019305624067783356, "epoch": 8.63, "learning_rate": 4.367031315448419e-05, "loss": 0.018, "step": 9083, "task_loss": 0.006342671811580658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0106560830026865, "epoch": 8.63, "learning_rate": 4.366322535158215e-05, "loss": 0.014, "step": 9084, "task_loss": 0.043936390429735184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06862330436706543, "epoch": 8.63, "learning_rate": 4.3656134158415344e-05, "loss": 0.0769, "step": 9085, "task_loss": 0.15090253949165344 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032704662531614304, "epoch": 8.63, "learning_rate": 4.364903957627192e-05, "loss": 0.0301, "step": 9086, "task_loss": 0.006765572354197502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022231170907616615, "epoch": 8.63, "learning_rate": 4.3641941606440644e-05, "loss": 0.0292, "step": 9087, "task_loss": 0.09189935028553009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15758824348449707, "epoch": 8.63, "learning_rate": 4.36348402502109e-05, "loss": 0.1631, "step": 9088, "task_loss": 0.212388277053833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018683135509490967, "epoch": 8.63, "learning_rate": 4.3627735508872666e-05, "loss": 0.0174, "step": 9089, "task_loss": 0.0056790560483932495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03444650024175644, "epoch": 8.63, "learning_rate": 4.362062738371657e-05, "loss": 0.0375, "step": 9090, "task_loss": 0.06486361473798752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032534707337617874, "epoch": 8.63, "learning_rate": 4.361351587603384e-05, "loss": 0.0591, "step": 9091, "task_loss": 0.29826819896698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02512388862669468, "epoch": 8.63, "learning_rate": 4.360640098711629e-05, "loss": 0.0234, "step": 9092, "task_loss": 0.007688479498028755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.053498346358537674, "epoch": 8.64, "learning_rate": 4.3599282718256406e-05, "loss": 0.0579, "step": 9093, "task_loss": 0.09801323711872101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018389012664556503, "epoch": 8.64, "learning_rate": 4.3592161070747233e-05, "loss": 0.017, "step": 9094, "task_loss": 0.004471609368920326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03994788974523544, "epoch": 8.64, "learning_rate": 4.358503604588247e-05, "loss": 0.054, "step": 9095, "task_loss": 0.1802259087562561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0729394480586052, "epoch": 8.64, "learning_rate": 4.357790764495639e-05, "loss": 0.0712, "step": 9096, "task_loss": 0.055580612272024155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05639396235346794, "epoch": 8.64, "learning_rate": 4.357077586926392e-05, "loss": 0.065, "step": 9097, "task_loss": 0.1426202952861786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06405860185623169, "epoch": 8.64, "learning_rate": 4.356364072010059e-05, "loss": 0.0699, "step": 9098, "task_loss": 0.1220637708902359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0317995660007, "epoch": 8.64, "learning_rate": 4.3556502198762496e-05, "loss": 0.0292, "step": 9099, "task_loss": 0.006273902952671051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11453507095575333, "epoch": 8.64, "learning_rate": 4.354936030654642e-05, "loss": 0.1091, "step": 9100, "task_loss": 0.060647960752248764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020458191633224487, "epoch": 8.64, "learning_rate": 4.3542215044749705e-05, "loss": 0.0258, "step": 9101, "task_loss": 0.07395413517951965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04494299367070198, "epoch": 8.64, "learning_rate": 4.3535066414670336e-05, "loss": 0.0484, "step": 9102, "task_loss": 0.0797007754445076 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04942406713962555, "epoch": 8.64, "learning_rate": 4.352791441760687e-05, "loss": 0.0658, "step": 9103, "task_loss": 0.21325786411762238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07142602652311325, "epoch": 8.65, "learning_rate": 4.352075905485854e-05, "loss": 0.0679, "step": 9104, "task_loss": 0.03596463054418564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03519432991743088, "epoch": 8.65, "learning_rate": 4.351360032772512e-05, "loss": 0.0375, "step": 9105, "task_loss": 0.05811901390552521 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09797616302967072, "epoch": 8.65, "learning_rate": 4.3506438237507033e-05, "loss": 0.0951, "step": 9106, "task_loss": 0.06876572221517563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0369865708053112, "epoch": 8.65, "learning_rate": 4.3499272785505316e-05, "loss": 0.0636, "step": 9107, "task_loss": 0.3026273250579834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05089612305164337, "epoch": 8.65, "learning_rate": 4.349210397302161e-05, "loss": 0.0513, "step": 9108, "task_loss": 0.05530446022748947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019257325679063797, "epoch": 8.65, "learning_rate": 4.348493180135815e-05, "loss": 0.0181, "step": 9109, "task_loss": 0.007393643260002136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01597008667886257, "epoch": 8.65, "learning_rate": 4.347775627181782e-05, "loss": 0.0151, "step": 9110, "task_loss": 0.006781516596674919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05022025108337402, "epoch": 8.65, "learning_rate": 4.3470577385704056e-05, "loss": 0.0523, "step": 9111, "task_loss": 0.07132594287395477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07263205945491791, "epoch": 8.65, "learning_rate": 4.346339514432096e-05, "loss": 0.0749, "step": 9112, "task_loss": 0.09521390497684479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03168325126171112, "epoch": 8.65, "learning_rate": 4.345620954897322e-05, "loss": 0.0433, "step": 9113, "task_loss": 0.1476568728685379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06712833046913147, "epoch": 8.66, "learning_rate": 4.344902060096612e-05, "loss": 0.0705, "step": 9114, "task_loss": 0.10096494853496552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04527723044157028, "epoch": 8.66, "learning_rate": 4.344182830160558e-05, "loss": 0.0446, "step": 9115, "task_loss": 0.03821223974227905 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035746797919273376, "epoch": 8.66, "learning_rate": 4.343463265219811e-05, "loss": 0.0337, "step": 9116, "task_loss": 0.014950959011912346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0270676426589489, "epoch": 8.66, "learning_rate": 4.342743365405084e-05, "loss": 0.0262, "step": 9117, "task_loss": 0.018197346478700638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05599695444107056, "epoch": 8.66, "learning_rate": 4.3420231308471496e-05, "loss": 0.0586, "step": 9118, "task_loss": 0.08177006244659424 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02762037329375744, "epoch": 8.66, "learning_rate": 4.3413025616768424e-05, "loss": 0.0306, "step": 9119, "task_loss": 0.057342953979969025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05162864178419113, "epoch": 8.66, "learning_rate": 4.340581658025058e-05, "loss": 0.0553, "step": 9120, "task_loss": 0.08785432577133179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029456917196512222, "epoch": 8.66, "learning_rate": 4.33986042002275e-05, "loss": 0.0385, "step": 9121, "task_loss": 0.11994407325983047 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0898047387599945, "epoch": 8.66, "learning_rate": 4.339138847800936e-05, "loss": 0.105, "step": 9122, "task_loss": 0.24178752303123474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09941445291042328, "epoch": 8.66, "learning_rate": 4.3384169414906925e-05, "loss": 0.107, "step": 9123, "task_loss": 0.1748739629983902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01261752750724554, "epoch": 8.66, "learning_rate": 4.3376947012231586e-05, "loss": 0.0193, "step": 9124, "task_loss": 0.07987774163484573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02077942155301571, "epoch": 8.67, "learning_rate": 4.336972127129532e-05, "loss": 0.0293, "step": 9125, "task_loss": 0.10643217712640762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16735947132110596, "epoch": 8.67, "learning_rate": 4.3362492193410705e-05, "loss": 0.1573, "step": 9126, "task_loss": 0.06678342819213867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030655119568109512, "epoch": 8.67, "learning_rate": 4.335525977989095e-05, "loss": 0.0421, "step": 9127, "task_loss": 0.14501135051250458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0633954331278801, "epoch": 8.67, "learning_rate": 4.334802403204986e-05, "loss": 0.0723, "step": 9128, "task_loss": 0.15235358476638794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021779123693704605, "epoch": 8.67, "learning_rate": 4.334078495120184e-05, "loss": 0.0288, "step": 9129, "task_loss": 0.09162303805351257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11775536835193634, "epoch": 8.67, "learning_rate": 4.33335425386619e-05, "loss": 0.1184, "step": 9130, "task_loss": 0.1240459531545639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1136549636721611, "epoch": 8.67, "learning_rate": 4.332629679574566e-05, "loss": 0.1147, "step": 9131, "task_loss": 0.12449486553668976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0809292122721672, "epoch": 8.67, "learning_rate": 4.331904772376935e-05, "loss": 0.0773, "step": 9132, "task_loss": 0.04456901177763939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02589108981192112, "epoch": 8.67, "learning_rate": 4.3311795324049795e-05, "loss": 0.024, "step": 9133, "task_loss": 0.006641261279582977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027674835175275803, "epoch": 8.67, "learning_rate": 4.3304539597904435e-05, "loss": 0.0262, "step": 9134, "task_loss": 0.013046126812696457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024105960503220558, "epoch": 8.68, "learning_rate": 4.3297280546651295e-05, "loss": 0.0225, "step": 9135, "task_loss": 0.007911447435617447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03654143214225769, "epoch": 8.68, "learning_rate": 4.329001817160903e-05, "loss": 0.0334, "step": 9136, "task_loss": 0.0051150210201740265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07622884958982468, "epoch": 8.68, "learning_rate": 4.3282752474096864e-05, "loss": 0.0791, "step": 9137, "task_loss": 0.10472090542316437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03936569392681122, "epoch": 8.68, "learning_rate": 4.327548345543467e-05, "loss": 0.0384, "step": 9138, "task_loss": 0.029306685552001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015473714098334312, "epoch": 8.68, "learning_rate": 4.326821111694289e-05, "loss": 0.0146, "step": 9139, "task_loss": 0.00628245621919632 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025059562176465988, "epoch": 8.68, "learning_rate": 4.3260935459942584e-05, "loss": 0.0311, "step": 9140, "task_loss": 0.08575528860092163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0825490728020668, "epoch": 8.68, "learning_rate": 4.32536564857554e-05, "loss": 0.0862, "step": 9141, "task_loss": 0.11920452117919922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03180139511823654, "epoch": 8.68, "learning_rate": 4.3246374195703604e-05, "loss": 0.0335, "step": 9142, "task_loss": 0.04881680756807327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06249899044632912, "epoch": 8.68, "learning_rate": 4.3239088591110065e-05, "loss": 0.0643, "step": 9143, "task_loss": 0.08072178065776825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06553536653518677, "epoch": 8.68, "learning_rate": 4.323179967329824e-05, "loss": 0.0638, "step": 9144, "task_loss": 0.048590727150440216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01741437427699566, "epoch": 8.68, "learning_rate": 4.3224507443592196e-05, "loss": 0.0162, "step": 9145, "task_loss": 0.005251972004771233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15998907387256622, "epoch": 8.69, "learning_rate": 4.321721190331661e-05, "loss": 0.1598, "step": 9146, "task_loss": 0.15769526362419128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041096534579992294, "epoch": 8.69, "learning_rate": 4.3209913053796746e-05, "loss": 0.0477, "step": 9147, "task_loss": 0.10717198252677917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06216401606798172, "epoch": 8.69, "learning_rate": 4.3202610896358474e-05, "loss": 0.0581, "step": 9148, "task_loss": 0.02124890312552452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02210739627480507, "epoch": 8.69, "learning_rate": 4.319530543232827e-05, "loss": 0.0205, "step": 9149, "task_loss": 0.006114525720477104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049875520169734955, "epoch": 8.69, "learning_rate": 4.31879966630332e-05, "loss": 0.0462, "step": 9150, "task_loss": 0.012633267790079117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0812768042087555, "epoch": 8.69, "learning_rate": 4.318068458980095e-05, "loss": 0.089, "step": 9151, "task_loss": 0.15828600525856018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03615918010473251, "epoch": 8.69, "learning_rate": 4.317336921395978e-05, "loss": 0.0358, "step": 9152, "task_loss": 0.03290475159883499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01615232229232788, "epoch": 8.69, "learning_rate": 4.316605053683856e-05, "loss": 0.0159, "step": 9153, "task_loss": 0.013984838500618935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12520606815814972, "epoch": 8.69, "learning_rate": 4.3158728559766786e-05, "loss": 0.1234, "step": 9154, "task_loss": 0.10756815969944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021900830790400505, "epoch": 8.69, "learning_rate": 4.315140328407451e-05, "loss": 0.0242, "step": 9155, "task_loss": 0.044664304703474045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0630156397819519, "epoch": 8.7, "learning_rate": 4.314407471109241e-05, "loss": 0.0814, "step": 9156, "task_loss": 0.2466500848531723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038144879043102264, "epoch": 8.7, "learning_rate": 4.313674284215176e-05, "loss": 0.0364, "step": 9157, "task_loss": 0.020709164440631866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09585539251565933, "epoch": 8.7, "learning_rate": 4.312940767858441e-05, "loss": 0.0947, "step": 9158, "task_loss": 0.08445055782794952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021123457700014114, "epoch": 8.7, "learning_rate": 4.312206922172286e-05, "loss": 0.0196, "step": 9159, "task_loss": 0.005899334326386452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026478923857212067, "epoch": 8.7, "learning_rate": 4.311472747290015e-05, "loss": 0.0246, "step": 9160, "task_loss": 0.007890569046139717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.048453450202941895, "epoch": 8.7, "learning_rate": 4.310738243344996e-05, "loss": 0.0517, "step": 9161, "task_loss": 0.0805249959230423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040751051157712936, "epoch": 8.7, "learning_rate": 4.310003410470653e-05, "loss": 0.0386, "step": 9162, "task_loss": 0.018803205341100693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04210975766181946, "epoch": 8.7, "learning_rate": 4.309268248800476e-05, "loss": 0.0473, "step": 9163, "task_loss": 0.09379490464925766 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0314488485455513, "epoch": 8.7, "learning_rate": 4.3085327584680056e-05, "loss": 0.0393, "step": 9164, "task_loss": 0.10950774699449539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023545613512396812, "epoch": 8.7, "learning_rate": 4.3077969396068505e-05, "loss": 0.0339, "step": 9165, "task_loss": 0.12670263648033142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026402778923511505, "epoch": 8.7, "learning_rate": 4.307060792350675e-05, "loss": 0.0296, "step": 9166, "task_loss": 0.057920970022678375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022520121186971664, "epoch": 8.71, "learning_rate": 4.306324316833203e-05, "loss": 0.0211, "step": 9167, "task_loss": 0.008033214136958122 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02548299916088581, "epoch": 8.71, "learning_rate": 4.3055875131882204e-05, "loss": 0.0301, "step": 9168, "task_loss": 0.071867436170578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11392084509134293, "epoch": 8.71, "learning_rate": 4.30485038154957e-05, "loss": 0.1113, "step": 9169, "task_loss": 0.08807753771543503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013116149231791496, "epoch": 8.71, "learning_rate": 4.304112922051155e-05, "loss": 0.0123, "step": 9170, "task_loss": 0.005404811352491379 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09974595159292221, "epoch": 8.71, "learning_rate": 4.30337513482694e-05, "loss": 0.0927, "step": 9171, "task_loss": 0.028922712430357933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017070695757865906, "epoch": 8.71, "learning_rate": 4.3026370200109463e-05, "loss": 0.0241, "step": 9172, "task_loss": 0.08706965297460556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04366880655288696, "epoch": 8.71, "learning_rate": 4.301898577737255e-05, "loss": 0.0453, "step": 9173, "task_loss": 0.06001284718513489 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021489912644028664, "epoch": 8.71, "learning_rate": 4.3011598081400105e-05, "loss": 0.0281, "step": 9174, "task_loss": 0.08786029368638992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02259124629199505, "epoch": 8.71, "learning_rate": 4.3004207113534124e-05, "loss": 0.0246, "step": 9175, "task_loss": 0.04274073615670204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07785982638597488, "epoch": 8.71, "learning_rate": 4.2996812875117206e-05, "loss": 0.0815, "step": 9176, "task_loss": 0.114007368683815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02123086154460907, "epoch": 8.72, "learning_rate": 4.2989415367492556e-05, "loss": 0.02, "step": 9177, "task_loss": 0.008761711418628693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020061947405338287, "epoch": 8.72, "learning_rate": 4.298201459200397e-05, "loss": 0.0188, "step": 9178, "task_loss": 0.007334306836128235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04284045845270157, "epoch": 8.72, "learning_rate": 4.2974610549995834e-05, "loss": 0.041, "step": 9179, "task_loss": 0.024930864572525024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05956036597490311, "epoch": 8.72, "learning_rate": 4.296720324281311e-05, "loss": 0.0714, "step": 9180, "task_loss": 0.17790013551712036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035681623965501785, "epoch": 8.72, "learning_rate": 4.29597926718014e-05, "loss": 0.0331, "step": 9181, "task_loss": 0.009932447224855423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03028404340147972, "epoch": 8.72, "learning_rate": 4.295237883830685e-05, "loss": 0.0336, "step": 9182, "task_loss": 0.06349918246269226 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.056420549750328064, "epoch": 8.72, "learning_rate": 4.294496174367623e-05, "loss": 0.0584, "step": 9183, "task_loss": 0.0761711597442627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08292526006698608, "epoch": 8.72, "learning_rate": 4.2937541389256877e-05, "loss": 0.0884, "step": 9184, "task_loss": 0.13766731321811676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013823905028402805, "epoch": 8.72, "learning_rate": 4.293011777639675e-05, "loss": 0.0128, "step": 9185, "task_loss": 0.0035786759108304977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04730577394366264, "epoch": 8.72, "learning_rate": 4.2922690906444374e-05, "loss": 0.0534, "step": 9186, "task_loss": 0.1083056777715683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07880745083093643, "epoch": 8.72, "learning_rate": 4.291526078074888e-05, "loss": 0.0767, "step": 9187, "task_loss": 0.05808747559785843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027023762464523315, "epoch": 8.73, "learning_rate": 4.290782740065997e-05, "loss": 0.0305, "step": 9188, "task_loss": 0.061429526656866074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06073024123907089, "epoch": 8.73, "learning_rate": 4.290039076752799e-05, "loss": 0.0719, "step": 9189, "task_loss": 0.172482430934906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015384482219815254, "epoch": 8.73, "learning_rate": 4.28929508827038e-05, "loss": 0.0144, "step": 9190, "task_loss": 0.00527518056333065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01384667120873928, "epoch": 8.73, "learning_rate": 4.288550774753892e-05, "loss": 0.0179, "step": 9191, "task_loss": 0.054292093962430954 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03736741095781326, "epoch": 8.73, "learning_rate": 4.2878061363385414e-05, "loss": 0.0363, "step": 9192, "task_loss": 0.02659483253955841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025986768305301666, "epoch": 8.73, "learning_rate": 4.287061173159597e-05, "loss": 0.0431, "step": 9193, "task_loss": 0.19738051295280457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02209838107228279, "epoch": 8.73, "learning_rate": 4.286315885352382e-05, "loss": 0.03, "step": 9194, "task_loss": 0.10124228149652481 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024954846128821373, "epoch": 8.73, "learning_rate": 4.285570273052285e-05, "loss": 0.0387, "step": 9195, "task_loss": 0.1625334769487381 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025567319244146347, "epoch": 8.73, "learning_rate": 4.2848243363947484e-05, "loss": 0.0244, "step": 9196, "task_loss": 0.014310002326965332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0735805556178093, "epoch": 8.73, "learning_rate": 4.2840780755152746e-05, "loss": 0.0714, "step": 9197, "task_loss": 0.0521991103887558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0836353451013565, "epoch": 8.74, "learning_rate": 4.283331490549426e-05, "loss": 0.0817, "step": 9198, "task_loss": 0.06436392664909363 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04380929470062256, "epoch": 8.74, "learning_rate": 4.282584581632824e-05, "loss": 0.0602, "step": 9199, "task_loss": 0.20809507369995117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12274999171495438, "epoch": 8.74, "learning_rate": 4.281837348901148e-05, "loss": 0.126, "step": 9200, "task_loss": 0.1554453819990158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057108841836452484, "epoch": 8.74, "learning_rate": 4.281089792490136e-05, "loss": 0.0531, "step": 9201, "task_loss": 0.016610583290457726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03790339082479477, "epoch": 8.74, "learning_rate": 4.280341912535585e-05, "loss": 0.0477, "step": 9202, "task_loss": 0.1361059844493866 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06707513332366943, "epoch": 8.74, "learning_rate": 4.2795937091733515e-05, "loss": 0.076, "step": 9203, "task_loss": 0.1563674360513687 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06679324805736542, "epoch": 8.74, "learning_rate": 4.27884518253935e-05, "loss": 0.0657, "step": 9204, "task_loss": 0.056202180683612823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04162095487117767, "epoch": 8.74, "learning_rate": 4.278096332769555e-05, "loss": 0.0453, "step": 9205, "task_loss": 0.07872645556926727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028269974514842033, "epoch": 8.74, "learning_rate": 4.277347159999997e-05, "loss": 0.0262, "step": 9206, "task_loss": 0.00805065967142582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1113031655550003, "epoch": 8.74, "learning_rate": 4.276597664366767e-05, "loss": 0.1132, "step": 9207, "task_loss": 0.1301630735397339 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028668878600001335, "epoch": 8.74, "learning_rate": 4.2758478460060166e-05, "loss": 0.0301, "step": 9208, "task_loss": 0.04257269576191902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03439326956868172, "epoch": 8.75, "learning_rate": 4.275097705053951e-05, "loss": 0.0324, "step": 9209, "task_loss": 0.014087924733757973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02624572440981865, "epoch": 8.75, "learning_rate": 4.2743472416468385e-05, "loss": 0.0285, "step": 9210, "task_loss": 0.048966288566589355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013097495771944523, "epoch": 8.75, "learning_rate": 4.2735964559210054e-05, "loss": 0.0122, "step": 9211, "task_loss": 0.004471452906727791 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08438113331794739, "epoch": 8.75, "learning_rate": 4.272845348012833e-05, "loss": 0.091, "step": 9212, "task_loss": 0.1502004712820053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015331685543060303, "epoch": 8.75, "learning_rate": 4.272093918058766e-05, "loss": 0.0143, "step": 9213, "task_loss": 0.004537465050816536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019342856481671333, "epoch": 8.75, "learning_rate": 4.271342166195304e-05, "loss": 0.0346, "step": 9214, "task_loss": 0.17235919833183289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023674393072724342, "epoch": 8.75, "learning_rate": 4.2705900925590056e-05, "loss": 0.0295, "step": 9215, "task_loss": 0.08143065869808197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013667159713804722, "epoch": 8.75, "learning_rate": 4.269837697286491e-05, "loss": 0.0127, "step": 9216, "task_loss": 0.0043479762971401215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03081374801695347, "epoch": 8.75, "learning_rate": 4.269084980514434e-05, "loss": 0.0283, "step": 9217, "task_loss": 0.0054970309138298035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019850486889481544, "epoch": 8.75, "learning_rate": 4.268331942379571e-05, "loss": 0.0197, "step": 9218, "task_loss": 0.018215559422969818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.053914956748485565, "epoch": 8.75, "learning_rate": 4.267578583018694e-05, "loss": 0.0562, "step": 9219, "task_loss": 0.07634520530700684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0433831587433815, "epoch": 8.76, "learning_rate": 4.2668249025686545e-05, "loss": 0.0427, "step": 9220, "task_loss": 0.03700166195631027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09998966753482819, "epoch": 8.76, "learning_rate": 4.2660709011663624e-05, "loss": 0.1053, "step": 9221, "task_loss": 0.1534278392791748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17341908812522888, "epoch": 8.76, "learning_rate": 4.2653165789487864e-05, "loss": 0.175, "step": 9222, "task_loss": 0.189271941781044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021686669439077377, "epoch": 8.76, "learning_rate": 4.2645619360529514e-05, "loss": 0.0204, "step": 9223, "task_loss": 0.009309127926826477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030826609581708908, "epoch": 8.76, "learning_rate": 4.2638069726159424e-05, "loss": 0.0394, "step": 9224, "task_loss": 0.11687062680721283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01709115505218506, "epoch": 8.76, "learning_rate": 4.263051688774902e-05, "loss": 0.0219, "step": 9225, "task_loss": 0.06518833339214325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03491377830505371, "epoch": 8.76, "learning_rate": 4.262296084667032e-05, "loss": 0.0541, "step": 9226, "task_loss": 0.22655193507671356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0234515480697155, "epoch": 8.76, "learning_rate": 4.2615401604295905e-05, "loss": 0.0265, "step": 9227, "task_loss": 0.0536535307765007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09627413749694824, "epoch": 8.76, "learning_rate": 4.260783916199895e-05, "loss": 0.0981, "step": 9228, "task_loss": 0.11438637971878052 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.065280482172966, "epoch": 8.76, "learning_rate": 4.260027352115321e-05, "loss": 0.0611, "step": 9229, "task_loss": 0.02382420189678669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04924977943301201, "epoch": 8.77, "learning_rate": 4.2592704683133035e-05, "loss": 0.0536, "step": 9230, "task_loss": 0.09264830499887466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02826646901667118, "epoch": 8.77, "learning_rate": 4.258513264931331e-05, "loss": 0.0412, "step": 9231, "task_loss": 0.1579834669828415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018645115196704865, "epoch": 8.77, "learning_rate": 4.257755742106956e-05, "loss": 0.0172, "step": 9232, "task_loss": 0.0041794683784246445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012779043056070805, "epoch": 8.77, "learning_rate": 4.256997899977784e-05, "loss": 0.012, "step": 9233, "task_loss": 0.004724707454442978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03663068264722824, "epoch": 8.77, "learning_rate": 4.2562397386814823e-05, "loss": 0.042, "step": 9234, "task_loss": 0.09023445844650269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018597885966300964, "epoch": 8.77, "learning_rate": 4.255481258355773e-05, "loss": 0.0174, "step": 9235, "task_loss": 0.006651686504483223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1508217453956604, "epoch": 8.77, "learning_rate": 4.254722459138441e-05, "loss": 0.1488, "step": 9236, "task_loss": 0.1301749348640442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04240924119949341, "epoch": 8.77, "learning_rate": 4.253963341167321e-05, "loss": 0.0484, "step": 9237, "task_loss": 0.10239571332931519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034557685256004333, "epoch": 8.77, "learning_rate": 4.253203904580314e-05, "loss": 0.0318, "step": 9238, "task_loss": 0.007468333467841148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007893304340541363, "epoch": 8.77, "learning_rate": 4.252444149515374e-05, "loss": 0.0165, "step": 9239, "task_loss": 0.09393760561943054 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03578212484717369, "epoch": 8.77, "learning_rate": 4.251684076110514e-05, "loss": 0.0509, "step": 9240, "task_loss": 0.1870957762002945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024300862103700638, "epoch": 8.78, "learning_rate": 4.250923684503806e-05, "loss": 0.0296, "step": 9241, "task_loss": 0.07725972682237625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14117558300495148, "epoch": 8.78, "learning_rate": 4.2501629748333774e-05, "loss": 0.1339, "step": 9242, "task_loss": 0.06846613436937332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10145477950572968, "epoch": 8.78, "learning_rate": 4.249401947237417e-05, "loss": 0.0969, "step": 9243, "task_loss": 0.056025002151727676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059924256056547165, "epoch": 8.78, "learning_rate": 4.248640601854166e-05, "loss": 0.0585, "step": 9244, "task_loss": 0.045406997203826904 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08220744132995605, "epoch": 8.78, "learning_rate": 4.247878938821929e-05, "loss": 0.0747, "step": 9245, "task_loss": 0.007002789527177811 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14508725702762604, "epoch": 8.78, "learning_rate": 4.247116958279065e-05, "loss": 0.1431, "step": 9246, "task_loss": 0.1252875030040741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03380056470632553, "epoch": 8.78, "learning_rate": 4.246354660363991e-05, "loss": 0.0311, "step": 9247, "task_loss": 0.007123725488781929 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03263848274946213, "epoch": 8.78, "learning_rate": 4.245592045215182e-05, "loss": 0.0461, "step": 9248, "task_loss": 0.16699370741844177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04700472578406334, "epoch": 8.78, "learning_rate": 4.244829112971172e-05, "loss": 0.0644, "step": 9249, "task_loss": 0.22047923505306244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13564848899841309, "epoch": 8.78, "learning_rate": 4.24406586377055e-05, "loss": 0.1365, "step": 9250, "task_loss": 0.14452001452445984 }, { "epoch": 8.78, "eval_accuracy": 0.8876146788990825, "eval_loss": 0.4403258264064789, "eval_runtime": 18.1936, "eval_samples_per_second": 47.929, "eval_steps_per_second": 5.991, "step": 9250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11035147309303284, "epoch": 8.79, "learning_rate": 4.2433022977519645e-05, "loss": 0.1127, "step": 9251, "task_loss": 0.13335135579109192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1526888608932495, "epoch": 8.79, "learning_rate": 4.2425384150541206e-05, "loss": 0.148, "step": 9252, "task_loss": 0.10532040894031525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07202281802892685, "epoch": 8.79, "learning_rate": 4.2417742158157816e-05, "loss": 0.0773, "step": 9253, "task_loss": 0.12501731514930725 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0764976292848587, "epoch": 8.79, "learning_rate": 4.2410097001757676e-05, "loss": 0.0794, "step": 9254, "task_loss": 0.10542768239974976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01943717896938324, "epoch": 8.79, "learning_rate": 4.2402448682729566e-05, "loss": 0.0181, "step": 9255, "task_loss": 0.005980361253023148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06404136121273041, "epoch": 8.79, "learning_rate": 4.2394797202462844e-05, "loss": 0.0716, "step": 9256, "task_loss": 0.13929347693920135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0188496895134449, "epoch": 8.79, "learning_rate": 4.238714256234744e-05, "loss": 0.018, "step": 9257, "task_loss": 0.010533113032579422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021757036447525024, "epoch": 8.79, "learning_rate": 4.237948476377385e-05, "loss": 0.0224, "step": 9258, "task_loss": 0.028252597898244858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038637999445199966, "epoch": 8.79, "learning_rate": 4.237182380813315e-05, "loss": 0.0356, "step": 9259, "task_loss": 0.008690120652318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03595548868179321, "epoch": 8.79, "learning_rate": 4.236415969681699e-05, "loss": 0.0449, "step": 9260, "task_loss": 0.12517251074314117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014416642487049103, "epoch": 8.79, "learning_rate": 4.23564924312176e-05, "loss": 0.0177, "step": 9261, "task_loss": 0.047183021903038025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08503177762031555, "epoch": 8.8, "learning_rate": 4.2348822012727765e-05, "loss": 0.0902, "step": 9262, "task_loss": 0.13704237341880798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060867466032505035, "epoch": 8.8, "learning_rate": 4.234114844274086e-05, "loss": 0.0737, "step": 9263, "task_loss": 0.1892949640750885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04127098247408867, "epoch": 8.8, "learning_rate": 4.2333471722650826e-05, "loss": 0.0375, "step": 9264, "task_loss": 0.0030923504382371902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02909199520945549, "epoch": 8.8, "learning_rate": 4.232579185385217e-05, "loss": 0.0267, "step": 9265, "task_loss": 0.004700042307376862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13650727272033691, "epoch": 8.8, "learning_rate": 4.231810883773999e-05, "loss": 0.1356, "step": 9266, "task_loss": 0.1277703493833542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07909749448299408, "epoch": 8.8, "learning_rate": 4.231042267570993e-05, "loss": 0.0816, "step": 9267, "task_loss": 0.10437445342540741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03750952333211899, "epoch": 8.8, "learning_rate": 4.230273336915822e-05, "loss": 0.0398, "step": 9268, "task_loss": 0.06028630957007408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02200540341436863, "epoch": 8.8, "learning_rate": 4.2295040919481664e-05, "loss": 0.0203, "step": 9269, "task_loss": 0.005379866808652878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1982060968875885, "epoch": 8.8, "learning_rate": 4.228734532807763e-05, "loss": 0.19, "step": 9270, "task_loss": 0.11627163738012314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03145551681518555, "epoch": 8.8, "learning_rate": 4.2279646596344067e-05, "loss": 0.0289, "step": 9271, "task_loss": 0.006189312785863876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02007415145635605, "epoch": 8.81, "learning_rate": 4.227194472567948e-05, "loss": 0.0271, "step": 9272, "task_loss": 0.09050026535987854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013911131769418716, "epoch": 8.81, "learning_rate": 4.2264239717482945e-05, "loss": 0.0129, "step": 9273, "task_loss": 0.004193015396595001 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0349629707634449, "epoch": 8.81, "learning_rate": 4.225653157315412e-05, "loss": 0.0387, "step": 9274, "task_loss": 0.07233209162950516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11082993447780609, "epoch": 8.81, "learning_rate": 4.224882029409323e-05, "loss": 0.1144, "step": 9275, "task_loss": 0.14643532037734985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014289605431258678, "epoch": 8.81, "learning_rate": 4.224110588170106e-05, "loss": 0.0134, "step": 9276, "task_loss": 0.005692243576049805 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0677342414855957, "epoch": 8.81, "learning_rate": 4.223338833737898e-05, "loss": 0.0682, "step": 9277, "task_loss": 0.07213495671749115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0438992902636528, "epoch": 8.81, "learning_rate": 4.22256676625289e-05, "loss": 0.0473, "step": 9278, "task_loss": 0.07815498113632202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13519689440727234, "epoch": 8.81, "learning_rate": 4.221794385855334e-05, "loss": 0.1365, "step": 9279, "task_loss": 0.14803387224674225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017305288463830948, "epoch": 8.81, "learning_rate": 4.221021692685534e-05, "loss": 0.0161, "step": 9280, "task_loss": 0.005480002611875534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036162495613098145, "epoch": 8.81, "learning_rate": 4.220248686883857e-05, "loss": 0.0348, "step": 9281, "task_loss": 0.022239340469241142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02372421696782112, "epoch": 8.81, "learning_rate": 4.21947536859072e-05, "loss": 0.0221, "step": 9282, "task_loss": 0.007254859432578087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040189243853092194, "epoch": 8.82, "learning_rate": 4.218701737946601e-05, "loss": 0.0424, "step": 9283, "task_loss": 0.062415711581707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03214951977133751, "epoch": 8.82, "learning_rate": 4.217927795092034e-05, "loss": 0.0379, "step": 9284, "task_loss": 0.08962158858776093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041563909500837326, "epoch": 8.82, "learning_rate": 4.21715354016761e-05, "loss": 0.0388, "step": 9285, "task_loss": 0.01408584788441658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05513744056224823, "epoch": 8.82, "learning_rate": 4.216378973313976e-05, "loss": 0.0519, "step": 9286, "task_loss": 0.02227054536342621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03985002636909485, "epoch": 8.82, "learning_rate": 4.215604094671835e-05, "loss": 0.0513, "step": 9287, "task_loss": 0.15437698364257812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.055905863642692566, "epoch": 8.82, "learning_rate": 4.214828904381947e-05, "loss": 0.0522, "step": 9288, "task_loss": 0.01845194399356842 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049979522824287415, "epoch": 8.82, "learning_rate": 4.21405340258513e-05, "loss": 0.054, "step": 9289, "task_loss": 0.09055973589420319 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04201385751366615, "epoch": 8.82, "learning_rate": 4.213277589422258e-05, "loss": 0.0532, "step": 9290, "task_loss": 0.1543242484331131 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10822063684463501, "epoch": 8.82, "learning_rate": 4.21250146503426e-05, "loss": 0.106, "step": 9291, "task_loss": 0.08604384958744049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02375629171729088, "epoch": 8.82, "learning_rate": 4.2117250295621235e-05, "loss": 0.0341, "step": 9292, "task_loss": 0.1269487887620926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028724079951643944, "epoch": 8.83, "learning_rate": 4.210948283146892e-05, "loss": 0.027, "step": 9293, "task_loss": 0.011627469211816788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023904111236333847, "epoch": 8.83, "learning_rate": 4.210171225929664e-05, "loss": 0.022, "step": 9294, "task_loss": 0.004593405872583389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030907439067959785, "epoch": 8.83, "learning_rate": 4.209393858051598e-05, "loss": 0.0352, "step": 9295, "task_loss": 0.0739751011133194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04190927371382713, "epoch": 8.83, "learning_rate": 4.208616179653903e-05, "loss": 0.0506, "step": 9296, "task_loss": 0.12926626205444336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011166459880769253, "epoch": 8.83, "learning_rate": 4.207838190877852e-05, "loss": 0.0194, "step": 9297, "task_loss": 0.09343065321445465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07796520739793777, "epoch": 8.83, "learning_rate": 4.2070598918647683e-05, "loss": 0.0892, "step": 9298, "task_loss": 0.19046609103679657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13977394998073578, "epoch": 8.83, "learning_rate": 4.206281282756034e-05, "loss": 0.137, "step": 9299, "task_loss": 0.1115388497710228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01926058530807495, "epoch": 8.83, "learning_rate": 4.205502363693087e-05, "loss": 0.0178, "step": 9300, "task_loss": 0.004766935482621193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014616726897656918, "epoch": 8.83, "learning_rate": 4.204723134817422e-05, "loss": 0.0137, "step": 9301, "task_loss": 0.005341559648513794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031305767595767975, "epoch": 8.83, "learning_rate": 4.2039435962705886e-05, "loss": 0.0336, "step": 9302, "task_loss": 0.054229650646448135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06339511275291443, "epoch": 8.83, "learning_rate": 4.2031637481941954e-05, "loss": 0.0737, "step": 9303, "task_loss": 0.1668478399515152 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03651703521609306, "epoch": 8.84, "learning_rate": 4.202383590729905e-05, "loss": 0.0422, "step": 9304, "task_loss": 0.09357205033302307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03856692090630531, "epoch": 8.84, "learning_rate": 4.201603124019436e-05, "loss": 0.0393, "step": 9305, "task_loss": 0.0458366759121418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022785726934671402, "epoch": 8.84, "learning_rate": 4.200822348204565e-05, "loss": 0.0277, "step": 9306, "task_loss": 0.07169666886329651 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.054608479142189026, "epoch": 8.84, "learning_rate": 4.200041263427123e-05, "loss": 0.0635, "step": 9307, "task_loss": 0.14374345541000366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0937652736902237, "epoch": 8.84, "learning_rate": 4.199259869828998e-05, "loss": 0.0977, "step": 9308, "task_loss": 0.13320079445838928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0214654803276062, "epoch": 8.84, "learning_rate": 4.1984781675521345e-05, "loss": 0.0285, "step": 9309, "task_loss": 0.09145978093147278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02630656771361828, "epoch": 8.84, "learning_rate": 4.1976961567385306e-05, "loss": 0.0298, "step": 9310, "task_loss": 0.06128865107893944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029763180762529373, "epoch": 8.84, "learning_rate": 4.1969138375302445e-05, "loss": 0.0275, "step": 9311, "task_loss": 0.007424212992191315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013515127822756767, "epoch": 8.84, "learning_rate": 4.1961312100693874e-05, "loss": 0.0312, "step": 9312, "task_loss": 0.19070707261562347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009992139413952827, "epoch": 8.84, "learning_rate": 4.1953482744981274e-05, "loss": 0.0093, "step": 9313, "task_loss": 0.0031170137226581573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08150660246610641, "epoch": 8.85, "learning_rate": 4.194565030958688e-05, "loss": 0.0909, "step": 9314, "task_loss": 0.17554497718811035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01873527467250824, "epoch": 8.85, "learning_rate": 4.19378147959335e-05, "loss": 0.0211, "step": 9315, "task_loss": 0.04287352040410042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16984689235687256, "epoch": 8.85, "learning_rate": 4.192997620544449e-05, "loss": 0.1605, "step": 9316, "task_loss": 0.0767621323466301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06193321570754051, "epoch": 8.85, "learning_rate": 4.192213453954377e-05, "loss": 0.0631, "step": 9317, "task_loss": 0.07395564019680023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01895410194993019, "epoch": 8.85, "learning_rate": 4.19142897996558e-05, "loss": 0.0228, "step": 9318, "task_loss": 0.05743853747844696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09764152765274048, "epoch": 8.85, "learning_rate": 4.190644198720563e-05, "loss": 0.0953, "step": 9319, "task_loss": 0.07411730289459229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02355141192674637, "epoch": 8.85, "learning_rate": 4.189859110361886e-05, "loss": 0.0269, "step": 9320, "task_loss": 0.05675677955150604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.23243659734725952, "epoch": 8.85, "learning_rate": 4.189073715032163e-05, "loss": 0.2339, "step": 9321, "task_loss": 0.24723902344703674 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057536251842975616, "epoch": 8.85, "learning_rate": 4.188288012874065e-05, "loss": 0.0635, "step": 9322, "task_loss": 0.11727714538574219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07297425717115402, "epoch": 8.85, "learning_rate": 4.187502004030318e-05, "loss": 0.0841, "step": 9323, "task_loss": 0.1841675043106079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.19478213787078857, "epoch": 8.85, "learning_rate": 4.186715688643705e-05, "loss": 0.1956, "step": 9324, "task_loss": 0.202731192111969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16516168415546417, "epoch": 8.86, "learning_rate": 4.185929066857064e-05, "loss": 0.1631, "step": 9325, "task_loss": 0.14431238174438477 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.14519473910331726, "epoch": 8.86, "learning_rate": 4.1851421388132886e-05, "loss": 0.1572, "step": 9326, "task_loss": 0.2655932903289795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04612842947244644, "epoch": 8.86, "learning_rate": 4.1843549046553284e-05, "loss": 0.0491, "step": 9327, "task_loss": 0.07564530521631241 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16417290270328522, "epoch": 8.86, "learning_rate": 4.183567364526186e-05, "loss": 0.1742, "step": 9328, "task_loss": 0.26465079188346863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02931727096438408, "epoch": 8.86, "learning_rate": 4.182779518568926e-05, "loss": 0.0414, "step": 9329, "task_loss": 0.1504097282886505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05426057428121567, "epoch": 8.86, "learning_rate": 4.181991366926661e-05, "loss": 0.0512, "step": 9330, "task_loss": 0.02333644963800907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2061796486377716, "epoch": 8.86, "learning_rate": 4.181202909742564e-05, "loss": 0.2011, "step": 9331, "task_loss": 0.15504643321037292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01282942108809948, "epoch": 8.86, "learning_rate": 4.1804141471598604e-05, "loss": 0.026, "step": 9332, "task_loss": 0.14491677284240723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057288218289613724, "epoch": 8.86, "learning_rate": 4.179625079321836e-05, "loss": 0.0536, "step": 9333, "task_loss": 0.020448647439479828 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06341984868049622, "epoch": 8.86, "learning_rate": 4.1788357063718254e-05, "loss": 0.0675, "step": 9334, "task_loss": 0.10468359291553497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05787978321313858, "epoch": 8.87, "learning_rate": 4.178046028453224e-05, "loss": 0.0547, "step": 9335, "task_loss": 0.026474833488464355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05362321063876152, "epoch": 8.87, "learning_rate": 4.1772560457094795e-05, "loss": 0.0721, "step": 9336, "task_loss": 0.23888705670833588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06569310277700424, "epoch": 8.87, "learning_rate": 4.1764657582840965e-05, "loss": 0.07, "step": 9337, "task_loss": 0.10902517288923264 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1104736477136612, "epoch": 8.87, "learning_rate": 4.175675166320635e-05, "loss": 0.1168, "step": 9338, "task_loss": 0.17340587079524994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014345163479447365, "epoch": 8.87, "learning_rate": 4.1748842699627094e-05, "loss": 0.0135, "step": 9339, "task_loss": 0.00562736950814724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03695271164178848, "epoch": 8.87, "learning_rate": 4.17409306935399e-05, "loss": 0.0341, "step": 9340, "task_loss": 0.008127160370349884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01702256128191948, "epoch": 8.87, "learning_rate": 4.173301564638201e-05, "loss": 0.0247, "step": 9341, "task_loss": 0.09353862702846527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01691245101392269, "epoch": 8.87, "learning_rate": 4.1725097559591256e-05, "loss": 0.0158, "step": 9342, "task_loss": 0.005920737981796265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03731689229607582, "epoch": 8.87, "learning_rate": 4.1717176434605967e-05, "loss": 0.0357, "step": 9343, "task_loss": 0.021031979471445084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0925697386264801, "epoch": 8.87, "learning_rate": 4.170925227286508e-05, "loss": 0.1213, "step": 9344, "task_loss": 0.38006648421287537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00923939235508442, "epoch": 8.87, "learning_rate": 4.170132507580803e-05, "loss": 0.0155, "step": 9345, "task_loss": 0.07144822180271149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009474704042077065, "epoch": 8.88, "learning_rate": 4.1693394844874856e-05, "loss": 0.0154, "step": 9346, "task_loss": 0.06834915280342102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0885670930147171, "epoch": 8.88, "learning_rate": 4.1685461581506115e-05, "loss": 0.1049, "step": 9347, "task_loss": 0.2523471713066101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1884203851222992, "epoch": 8.88, "learning_rate": 4.167752528714291e-05, "loss": 0.1863, "step": 9348, "task_loss": 0.16740942001342773 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013086885213851929, "epoch": 8.88, "learning_rate": 4.166958596322692e-05, "loss": 0.0186, "step": 9349, "task_loss": 0.06813879311084747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1107698380947113, "epoch": 8.88, "learning_rate": 4.1661643611200366e-05, "loss": 0.1103, "step": 9350, "task_loss": 0.10653585195541382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03403695300221443, "epoch": 8.88, "learning_rate": 4.1653698232506e-05, "loss": 0.0334, "step": 9351, "task_loss": 0.02738173119723797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016268638893961906, "epoch": 8.88, "learning_rate": 4.1645749828587145e-05, "loss": 0.0161, "step": 9352, "task_loss": 0.014955738559365273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039960239082574844, "epoch": 8.88, "learning_rate": 4.1637798400887674e-05, "loss": 0.0423, "step": 9353, "task_loss": 0.06289532035589218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021891163662075996, "epoch": 8.88, "learning_rate": 4.162984395085198e-05, "loss": 0.0206, "step": 9354, "task_loss": 0.008558722212910652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05967186763882637, "epoch": 8.88, "learning_rate": 4.162188647992506e-05, "loss": 0.0628, "step": 9355, "task_loss": 0.09045903384685516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11606243252754211, "epoch": 8.89, "learning_rate": 4.161392598955239e-05, "loss": 0.1182, "step": 9356, "task_loss": 0.13704703748226166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05215151980519295, "epoch": 8.89, "learning_rate": 4.160596248118007e-05, "loss": 0.066, "step": 9357, "task_loss": 0.19018231332302094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016069339588284492, "epoch": 8.89, "learning_rate": 4.159799595625468e-05, "loss": 0.015, "step": 9358, "task_loss": 0.005058174952864647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016761645674705505, "epoch": 8.89, "learning_rate": 4.159002641622338e-05, "loss": 0.0165, "step": 9359, "task_loss": 0.013712173327803612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03098924085497856, "epoch": 8.89, "learning_rate": 4.1582053862533895e-05, "loss": 0.0378, "step": 9360, "task_loss": 0.0994289442896843 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1274225413799286, "epoch": 8.89, "learning_rate": 4.157407829663446e-05, "loss": 0.1212, "step": 9361, "task_loss": 0.06485594809055328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016975758597254753, "epoch": 8.89, "learning_rate": 4.1566099719973884e-05, "loss": 0.0294, "step": 9362, "task_loss": 0.1415422558784485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13337287306785583, "epoch": 8.89, "learning_rate": 4.1558118134001514e-05, "loss": 0.1266, "step": 9363, "task_loss": 0.06562095880508423 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09345261007547379, "epoch": 8.89, "learning_rate": 4.155013354016723e-05, "loss": 0.0891, "step": 9364, "task_loss": 0.04989039897918701 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06697876751422882, "epoch": 8.89, "learning_rate": 4.154214593992149e-05, "loss": 0.0627, "step": 9365, "task_loss": 0.02389654330909252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15374194085597992, "epoch": 8.89, "learning_rate": 4.1534155334715264e-05, "loss": 0.1484, "step": 9366, "task_loss": 0.1006234660744667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009657006710767746, "epoch": 8.9, "learning_rate": 4.15261617260001e-05, "loss": 0.0192, "step": 9367, "task_loss": 0.10532604157924652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0194817166775465, "epoch": 8.9, "learning_rate": 4.151816511522807e-05, "loss": 0.0183, "step": 9368, "task_loss": 0.0075190383940935135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018920117989182472, "epoch": 8.9, "learning_rate": 4.151016550385179e-05, "loss": 0.0241, "step": 9369, "task_loss": 0.07103104889392853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015537586063146591, "epoch": 8.9, "learning_rate": 4.150216289332443e-05, "loss": 0.0144, "step": 9370, "task_loss": 0.004283284768462181 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07015059888362885, "epoch": 8.9, "learning_rate": 4.149415728509971e-05, "loss": 0.0735, "step": 9371, "task_loss": 0.10341081768274307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.17750681936740875, "epoch": 8.9, "learning_rate": 4.1486148680631875e-05, "loss": 0.189, "step": 9372, "task_loss": 0.29196876287460327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03413275629281998, "epoch": 8.9, "learning_rate": 4.147813708137574e-05, "loss": 0.0359, "step": 9373, "task_loss": 0.051412858068943024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044314898550510406, "epoch": 8.9, "learning_rate": 4.1470122488786645e-05, "loss": 0.0408, "step": 9374, "task_loss": 0.00903802365064621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03658336400985718, "epoch": 8.9, "learning_rate": 4.146210490432048e-05, "loss": 0.0348, "step": 9375, "task_loss": 0.018489893525838852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01839321479201317, "epoch": 8.9, "learning_rate": 4.1454084329433674e-05, "loss": 0.0171, "step": 9376, "task_loss": 0.005394909530878067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04376121610403061, "epoch": 8.91, "learning_rate": 4.144606076558321e-05, "loss": 0.0486, "step": 9377, "task_loss": 0.09205228835344315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02223261632025242, "epoch": 8.91, "learning_rate": 4.14380342142266e-05, "loss": 0.029, "step": 9378, "task_loss": 0.08994803577661514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029528465121984482, "epoch": 8.91, "learning_rate": 4.14300046768219e-05, "loss": 0.0361, "step": 9379, "task_loss": 0.09534671902656555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038068436086177826, "epoch": 8.91, "learning_rate": 4.1421972154827724e-05, "loss": 0.0398, "step": 9380, "task_loss": 0.055462270975112915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02015954628586769, "epoch": 8.91, "learning_rate": 4.141393664970323e-05, "loss": 0.0265, "step": 9381, "task_loss": 0.0838894322514534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02617390640079975, "epoch": 8.91, "learning_rate": 4.140589816290808e-05, "loss": 0.0296, "step": 9382, "task_loss": 0.060920245945453644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010754341259598732, "epoch": 8.91, "learning_rate": 4.1397856695902535e-05, "loss": 0.0171, "step": 9383, "task_loss": 0.07444259524345398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.052527911961078644, "epoch": 8.91, "learning_rate": 4.138981225014733e-05, "loss": 0.0651, "step": 9384, "task_loss": 0.17810016870498657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035542987287044525, "epoch": 8.91, "learning_rate": 4.1381764827103806e-05, "loss": 0.033, "step": 9385, "task_loss": 0.010187897831201553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009268272668123245, "epoch": 8.91, "learning_rate": 4.13737144282338e-05, "loss": 0.0144, "step": 9386, "task_loss": 0.06069015711545944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01877554878592491, "epoch": 8.91, "learning_rate": 4.1365661054999715e-05, "loss": 0.0176, "step": 9387, "task_loss": 0.007025185972452164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.056551381945610046, "epoch": 8.92, "learning_rate": 4.1357604708864475e-05, "loss": 0.067, "step": 9388, "task_loss": 0.16118671000003815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12249293923377991, "epoch": 8.92, "learning_rate": 4.1349545391291563e-05, "loss": 0.1238, "step": 9389, "task_loss": 0.13515231013298035 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04289904981851578, "epoch": 8.92, "learning_rate": 4.1341483103745006e-05, "loss": 0.0393, "step": 9390, "task_loss": 0.006975535303354263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047507885843515396, "epoch": 8.92, "learning_rate": 4.133341784768933e-05, "loss": 0.0585, "step": 9391, "task_loss": 0.15751934051513672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03881534934043884, "epoch": 8.92, "learning_rate": 4.132534962458962e-05, "loss": 0.0365, "step": 9392, "task_loss": 0.01544598676264286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027173154056072235, "epoch": 8.92, "learning_rate": 4.131727843591155e-05, "loss": 0.025, "step": 9393, "task_loss": 0.00519617460668087 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0583278089761734, "epoch": 8.92, "learning_rate": 4.130920428312127e-05, "loss": 0.0568, "step": 9394, "task_loss": 0.042698342353105545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1009674221277237, "epoch": 8.92, "learning_rate": 4.130112716768548e-05, "loss": 0.102, "step": 9395, "task_loss": 0.11100717633962631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07862529158592224, "epoch": 8.92, "learning_rate": 4.129304709107143e-05, "loss": 0.0764, "step": 9396, "task_loss": 0.0560125857591629 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01500142365694046, "epoch": 8.92, "learning_rate": 4.128496405474691e-05, "loss": 0.014, "step": 9397, "task_loss": 0.005422631278634071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025359123945236206, "epoch": 8.92, "learning_rate": 4.127687806018024e-05, "loss": 0.0406, "step": 9398, "task_loss": 0.1776316612958908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061490267515182495, "epoch": 8.93, "learning_rate": 4.1268789108840275e-05, "loss": 0.0665, "step": 9399, "task_loss": 0.11160209774971008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05927234888076782, "epoch": 8.93, "learning_rate": 4.126069720219642e-05, "loss": 0.0682, "step": 9400, "task_loss": 0.14884845912456512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12741845846176147, "epoch": 8.93, "learning_rate": 4.125260234171861e-05, "loss": 0.1217, "step": 9401, "task_loss": 0.07042922079563141 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020310278981924057, "epoch": 8.93, "learning_rate": 4.12445045288773e-05, "loss": 0.0244, "step": 9402, "task_loss": 0.06154558062553406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049872465431690216, "epoch": 8.93, "learning_rate": 4.123640376514353e-05, "loss": 0.0495, "step": 9403, "task_loss": 0.04623175412416458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018078003078699112, "epoch": 8.93, "learning_rate": 4.12283000519888e-05, "loss": 0.0304, "step": 9404, "task_loss": 0.14123806357383728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013958177529275417, "epoch": 8.93, "learning_rate": 4.122019339088522e-05, "loss": 0.0213, "step": 9405, "task_loss": 0.08724473416805267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020791584625840187, "epoch": 8.93, "learning_rate": 4.121208378330539e-05, "loss": 0.0194, "step": 9406, "task_loss": 0.006587089970707893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03328198194503784, "epoch": 8.93, "learning_rate": 4.120397123072246e-05, "loss": 0.0314, "step": 9407, "task_loss": 0.014744751155376434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017374586313962936, "epoch": 8.93, "learning_rate": 4.119585573461012e-05, "loss": 0.016, "step": 9408, "task_loss": 0.0033780932426452637 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03562571480870247, "epoch": 8.94, "learning_rate": 4.118773729644258e-05, "loss": 0.0412, "step": 9409, "task_loss": 0.09113264083862305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043064821511507034, "epoch": 8.94, "learning_rate": 4.11796159176946e-05, "loss": 0.0501, "step": 9410, "task_loss": 0.11316752433776855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08931790292263031, "epoch": 8.94, "learning_rate": 4.117149159984147e-05, "loss": 0.0858, "step": 9411, "task_loss": 0.0536871999502182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0797976404428482, "epoch": 8.94, "learning_rate": 4.116336434435901e-05, "loss": 0.0779, "step": 9412, "task_loss": 0.060566820204257965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057995155453681946, "epoch": 8.94, "learning_rate": 4.115523415272358e-05, "loss": 0.0555, "step": 9413, "task_loss": 0.033352144062519073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03625299036502838, "epoch": 8.94, "learning_rate": 4.1147101026412046e-05, "loss": 0.0428, "step": 9414, "task_loss": 0.10129890590906143 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04442795366048813, "epoch": 8.94, "learning_rate": 4.1138964966901853e-05, "loss": 0.0427, "step": 9415, "task_loss": 0.027060629799962044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01638917066156864, "epoch": 8.94, "learning_rate": 4.113082597567095e-05, "loss": 0.0203, "step": 9416, "task_loss": 0.055116184055805206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041155144572257996, "epoch": 8.94, "learning_rate": 4.112268405419782e-05, "loss": 0.0521, "step": 9417, "task_loss": 0.1504831463098526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028086768463253975, "epoch": 8.94, "learning_rate": 4.1114539203961476e-05, "loss": 0.0373, "step": 9418, "task_loss": 0.12012705206871033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026071809232234955, "epoch": 8.94, "learning_rate": 4.110639142644149e-05, "loss": 0.0252, "step": 9419, "task_loss": 0.016939345747232437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024052530527114868, "epoch": 8.95, "learning_rate": 4.109824072311792e-05, "loss": 0.0352, "step": 9420, "task_loss": 0.13542687892913818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.048306502401828766, "epoch": 8.95, "learning_rate": 4.10900870954714e-05, "loss": 0.047, "step": 9421, "task_loss": 0.03500698506832123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0342412106692791, "epoch": 8.95, "learning_rate": 4.108193054498307e-05, "loss": 0.0553, "step": 9422, "task_loss": 0.24514110386371613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013718321919441223, "epoch": 8.95, "learning_rate": 4.10737710731346e-05, "loss": 0.0214, "step": 9423, "task_loss": 0.09085668623447418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013474099338054657, "epoch": 8.95, "learning_rate": 4.106560868140821e-05, "loss": 0.0191, "step": 9424, "task_loss": 0.07011019438505173 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0738307386636734, "epoch": 8.95, "learning_rate": 4.105744337128662e-05, "loss": 0.0698, "step": 9425, "task_loss": 0.0339224673807621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04669392481446266, "epoch": 8.95, "learning_rate": 4.104927514425312e-05, "loss": 0.0546, "step": 9426, "task_loss": 0.12560123205184937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04168114811182022, "epoch": 8.95, "learning_rate": 4.104110400179148e-05, "loss": 0.039, "step": 9427, "task_loss": 0.015049118548631668 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020040128380060196, "epoch": 8.95, "learning_rate": 4.103292994538605e-05, "loss": 0.0184, "step": 9428, "task_loss": 0.00409487821161747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07207145541906357, "epoch": 8.95, "learning_rate": 4.102475297652168e-05, "loss": 0.0685, "step": 9429, "task_loss": 0.036523304879665375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01679806597530842, "epoch": 8.96, "learning_rate": 4.1016573096683765e-05, "loss": 0.0215, "step": 9430, "task_loss": 0.0642915591597557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020311608910560608, "epoch": 8.96, "learning_rate": 4.10083903073582e-05, "loss": 0.0239, "step": 9431, "task_loss": 0.05604963004589081 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02877676486968994, "epoch": 8.96, "learning_rate": 4.1000204610031447e-05, "loss": 0.0267, "step": 9432, "task_loss": 0.007656911388039589 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024545563384890556, "epoch": 8.96, "learning_rate": 4.0992016006190456e-05, "loss": 0.0326, "step": 9433, "task_loss": 0.10507547855377197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05073285847902298, "epoch": 8.96, "learning_rate": 4.0983824497322755e-05, "loss": 0.0484, "step": 9434, "task_loss": 0.027833662927150726 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027377085760235786, "epoch": 8.96, "learning_rate": 4.0975630084916344e-05, "loss": 0.0253, "step": 9435, "task_loss": 0.0062708742916584015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015949569642543793, "epoch": 8.96, "learning_rate": 4.096743277045979e-05, "loss": 0.0235, "step": 9436, "task_loss": 0.09101761877536774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06154026836156845, "epoch": 8.96, "learning_rate": 4.0959232555442174e-05, "loss": 0.0634, "step": 9437, "task_loss": 0.07965725660324097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039994481950998306, "epoch": 8.96, "learning_rate": 4.0951029441353104e-05, "loss": 0.0373, "step": 9438, "task_loss": 0.012804200872778893 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07882995158433914, "epoch": 8.96, "learning_rate": 4.094282342968271e-05, "loss": 0.0807, "step": 9439, "task_loss": 0.09789763391017914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09340311586856842, "epoch": 8.96, "learning_rate": 4.093461452192167e-05, "loss": 0.0986, "step": 9440, "task_loss": 0.1451651006937027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0292808897793293, "epoch": 8.97, "learning_rate": 4.092640271956115e-05, "loss": 0.0349, "step": 9441, "task_loss": 0.08514495939016342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1028076782822609, "epoch": 8.97, "learning_rate": 4.091818802409288e-05, "loss": 0.112, "step": 9442, "task_loss": 0.19489334523677826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10867805033922195, "epoch": 8.97, "learning_rate": 4.0909970437009096e-05, "loss": 0.1137, "step": 9443, "task_loss": 0.1586572676897049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012916838750243187, "epoch": 8.97, "learning_rate": 4.0901749959802546e-05, "loss": 0.0168, "step": 9444, "task_loss": 0.05206574872136116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1345238983631134, "epoch": 8.97, "learning_rate": 4.0893526593966535e-05, "loss": 0.1296, "step": 9445, "task_loss": 0.0848974958062172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07488267123699188, "epoch": 8.97, "learning_rate": 4.088530034099488e-05, "loss": 0.0872, "step": 9446, "task_loss": 0.1976650059223175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041900884360075, "epoch": 8.97, "learning_rate": 4.087707120238191e-05, "loss": 0.0388, "step": 9447, "task_loss": 0.010816860944032669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019380036741495132, "epoch": 8.97, "learning_rate": 4.0868839179622495e-05, "loss": 0.0188, "step": 9448, "task_loss": 0.013160983100533485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07051914930343628, "epoch": 8.97, "learning_rate": 4.086060427421202e-05, "loss": 0.0652, "step": 9449, "task_loss": 0.01760847680270672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0432322658598423, "epoch": 8.97, "learning_rate": 4.0852366487646384e-05, "loss": 0.0517, "step": 9450, "task_loss": 0.12742291390895844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027135606855154037, "epoch": 8.98, "learning_rate": 4.084412582142204e-05, "loss": 0.0364, "step": 9451, "task_loss": 0.1198822483420372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15044289827346802, "epoch": 8.98, "learning_rate": 4.083588227703593e-05, "loss": 0.1615, "step": 9452, "task_loss": 0.26102936267852783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0592607781291008, "epoch": 8.98, "learning_rate": 4.0827635855985534e-05, "loss": 0.0603, "step": 9453, "task_loss": 0.06970083713531494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09907069802284241, "epoch": 8.98, "learning_rate": 4.081938655976886e-05, "loss": 0.1052, "step": 9454, "task_loss": 0.16077247262001038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028371935710310936, "epoch": 8.98, "learning_rate": 4.0811134389884433e-05, "loss": 0.0264, "step": 9455, "task_loss": 0.008428094908595085 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0345245860517025, "epoch": 8.98, "learning_rate": 4.08028793478313e-05, "loss": 0.0411, "step": 9456, "task_loss": 0.09978184103965759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049868617206811905, "epoch": 8.98, "learning_rate": 4.0794621435109015e-05, "loss": 0.0674, "step": 9457, "task_loss": 0.22498998045921326 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02840796858072281, "epoch": 8.98, "learning_rate": 4.0786360653217684e-05, "loss": 0.0265, "step": 9458, "task_loss": 0.009237809106707573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03639814257621765, "epoch": 8.98, "learning_rate": 4.0778097003657915e-05, "loss": 0.0344, "step": 9459, "task_loss": 0.01689404621720314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020641740411520004, "epoch": 8.98, "learning_rate": 4.0769830487930835e-05, "loss": 0.0251, "step": 9460, "task_loss": 0.0654524564743042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05284133553504944, "epoch": 8.98, "learning_rate": 4.07615611075381e-05, "loss": 0.0577, "step": 9461, "task_loss": 0.10118487477302551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08153997361660004, "epoch": 8.99, "learning_rate": 4.075328886398188e-05, "loss": 0.0832, "step": 9462, "task_loss": 0.09851934015750885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024140600115060806, "epoch": 8.99, "learning_rate": 4.074501375876487e-05, "loss": 0.0225, "step": 9463, "task_loss": 0.008064748719334602 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04541175067424774, "epoch": 8.99, "learning_rate": 4.073673579339028e-05, "loss": 0.0472, "step": 9464, "task_loss": 0.06317553669214249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10922182351350784, "epoch": 8.99, "learning_rate": 4.0728454969361854e-05, "loss": 0.104, "step": 9465, "task_loss": 0.05705301836133003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04132990166544914, "epoch": 8.99, "learning_rate": 4.0720171288183815e-05, "loss": 0.0419, "step": 9466, "task_loss": 0.04671701043844223 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0743739902973175, "epoch": 8.99, "learning_rate": 4.0711884751360964e-05, "loss": 0.0726, "step": 9467, "task_loss": 0.05616597831249237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05336568504571915, "epoch": 8.99, "learning_rate": 4.070359536039858e-05, "loss": 0.0536, "step": 9468, "task_loss": 0.055919770151376724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12312227487564087, "epoch": 8.99, "learning_rate": 4.069530311680247e-05, "loss": 0.1289, "step": 9469, "task_loss": 0.180916890501976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0300409235060215, "epoch": 8.99, "learning_rate": 4.068700802207895e-05, "loss": 0.0301, "step": 9470, "task_loss": 0.030945636332035065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059253282845020294, "epoch": 8.99, "learning_rate": 4.0678710077734885e-05, "loss": 0.0671, "step": 9471, "task_loss": 0.13786810636520386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02918911725282669, "epoch": 9.0, "learning_rate": 4.0670409285277614e-05, "loss": 0.0329, "step": 9472, "task_loss": 0.06589201092720032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1608899086713791, "epoch": 9.0, "learning_rate": 4.0662105646215034e-05, "loss": 0.1544, "step": 9473, "task_loss": 0.0955737829208374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.045890383422374725, "epoch": 9.0, "learning_rate": 4.065379916205554e-05, "loss": 0.0452, "step": 9474, "task_loss": 0.03897964581847191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0502871535718441, "epoch": 9.0, "learning_rate": 4.0645489834308024e-05, "loss": 0.0519, "step": 9475, "task_loss": 0.06652585417032242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12040196359157562, "epoch": 9.0, "learning_rate": 4.063717766448194e-05, "loss": 0.1166, "step": 9476, "task_loss": 0.08265364170074463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09221913665533066, "epoch": 9.0, "learning_rate": 4.062886265408722e-05, "loss": 0.0868, "step": 9477, "task_loss": 0.037736404687166214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08050169050693512, "epoch": 9.0, "learning_rate": 4.062054480463433e-05, "loss": 0.0768, "step": 9478, "task_loss": 0.043371133506298065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02520603872835636, "epoch": 9.0, "learning_rate": 4.0612224117634245e-05, "loss": 0.0235, "step": 9479, "task_loss": 0.00836150161921978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018169641494750977, "epoch": 9.0, "learning_rate": 4.060390059459846e-05, "loss": 0.0285, "step": 9480, "task_loss": 0.1218915730714798 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09185026586055756, "epoch": 9.0, "learning_rate": 4.059557423703899e-05, "loss": 0.0925, "step": 9481, "task_loss": 0.09815002977848053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016890790313482285, "epoch": 9.0, "learning_rate": 4.058724504646834e-05, "loss": 0.024, "step": 9482, "task_loss": 0.08820458501577377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01857912167906761, "epoch": 9.01, "learning_rate": 4.0578913024399564e-05, "loss": 0.0222, "step": 9483, "task_loss": 0.05494304373860359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0701369047164917, "epoch": 9.01, "learning_rate": 4.057057817234621e-05, "loss": 0.0669, "step": 9484, "task_loss": 0.03729560971260071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017086252570152283, "epoch": 9.01, "learning_rate": 4.0562240491822334e-05, "loss": 0.0283, "step": 9485, "task_loss": 0.1295742690563202 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0375320240855217, "epoch": 9.01, "learning_rate": 4.055389998434253e-05, "loss": 0.0376, "step": 9486, "task_loss": 0.03832355886697769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022690830752253532, "epoch": 9.01, "learning_rate": 4.054555665142189e-05, "loss": 0.0255, "step": 9487, "task_loss": 0.05128246918320656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011978648602962494, "epoch": 9.01, "learning_rate": 4.053721049457601e-05, "loss": 0.0112, "step": 9488, "task_loss": 0.004542630165815353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014200868085026741, "epoch": 9.01, "learning_rate": 4.052886151532101e-05, "loss": 0.0218, "step": 9489, "task_loss": 0.09042149782180786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049319952726364136, "epoch": 9.01, "learning_rate": 4.0520509715173544e-05, "loss": 0.046, "step": 9490, "task_loss": 0.016364455223083496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024487420916557312, "epoch": 9.01, "learning_rate": 4.051215509565073e-05, "loss": 0.0227, "step": 9491, "task_loss": 0.006308834999799728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018296554684638977, "epoch": 9.01, "learning_rate": 4.050379765827024e-05, "loss": 0.0169, "step": 9492, "task_loss": 0.003857152536511421 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04161164537072182, "epoch": 9.02, "learning_rate": 4.0495437404550233e-05, "loss": 0.0451, "step": 9493, "task_loss": 0.07690572738647461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022137103602290154, "epoch": 9.02, "learning_rate": 4.04870743360094e-05, "loss": 0.0273, "step": 9494, "task_loss": 0.07371386885643005 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01800430938601494, "epoch": 9.02, "learning_rate": 4.047870845416693e-05, "loss": 0.017, "step": 9495, "task_loss": 0.00811527669429779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024769477546215057, "epoch": 9.02, "learning_rate": 4.0470339760542506e-05, "loss": 0.0336, "step": 9496, "task_loss": 0.11265938729047775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05644622817635536, "epoch": 9.02, "learning_rate": 4.0461968256656376e-05, "loss": 0.055, "step": 9497, "task_loss": 0.042227305471897125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010226700454950333, "epoch": 9.02, "learning_rate": 4.045359394402925e-05, "loss": 0.0097, "step": 9498, "task_loss": 0.005248824134469032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020847581326961517, "epoch": 9.02, "learning_rate": 4.0445216824182344e-05, "loss": 0.0196, "step": 9499, "task_loss": 0.008282596245408058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11503365635871887, "epoch": 9.02, "learning_rate": 4.043683689863742e-05, "loss": 0.1119, "step": 9500, "task_loss": 0.08395393192768097 }, { "epoch": 9.02, "eval_accuracy": 0.8910550458715596, "eval_loss": 0.4567631483078003, "eval_runtime": 18.0265, "eval_samples_per_second": 48.373, "eval_steps_per_second": 6.047, "step": 9500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04205450415611267, "epoch": 9.02, "learning_rate": 4.042845416891673e-05, "loss": 0.0486, "step": 9501, "task_loss": 0.10712815821170807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06236998736858368, "epoch": 9.02, "learning_rate": 4.042006863654303e-05, "loss": 0.0662, "step": 9502, "task_loss": 0.10055098682641983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019925886765122414, "epoch": 9.02, "learning_rate": 4.041168030303961e-05, "loss": 0.0187, "step": 9503, "task_loss": 0.008159628137946129 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06345760822296143, "epoch": 9.03, "learning_rate": 4.0403289169930235e-05, "loss": 0.0664, "step": 9504, "task_loss": 0.09281642735004425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023364584892988205, "epoch": 9.03, "learning_rate": 4.03948952387392e-05, "loss": 0.0272, "step": 9505, "task_loss": 0.06141924858093262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02729855477809906, "epoch": 9.03, "learning_rate": 4.03864985109913e-05, "loss": 0.0263, "step": 9506, "task_loss": 0.016974736005067825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01486403401941061, "epoch": 9.03, "learning_rate": 4.0378098988211845e-05, "loss": 0.0137, "step": 9507, "task_loss": 0.0035839397460222244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031053446233272552, "epoch": 9.03, "learning_rate": 4.036969667192665e-05, "loss": 0.0616, "step": 9508, "task_loss": 0.3368293344974518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07761267572641373, "epoch": 9.03, "learning_rate": 4.036129156366203e-05, "loss": 0.0834, "step": 9509, "task_loss": 0.13566073775291443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1153075098991394, "epoch": 9.03, "learning_rate": 4.0352883664944816e-05, "loss": 0.11, "step": 9510, "task_loss": 0.06233564764261246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015643972903490067, "epoch": 9.03, "learning_rate": 4.034447297730234e-05, "loss": 0.0146, "step": 9511, "task_loss": 0.005513627082109451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04660728946328163, "epoch": 9.03, "learning_rate": 4.033605950226246e-05, "loss": 0.0524, "step": 9512, "task_loss": 0.10417984426021576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01021486520767212, "epoch": 9.03, "learning_rate": 4.03276432413535e-05, "loss": 0.0097, "step": 9513, "task_loss": 0.004828939214348793 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030726449564099312, "epoch": 9.04, "learning_rate": 4.0319224196104334e-05, "loss": 0.0285, "step": 9514, "task_loss": 0.00808459147810936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0746147632598877, "epoch": 9.04, "learning_rate": 4.031080236804431e-05, "loss": 0.0731, "step": 9515, "task_loss": 0.05903128162026405 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06045572832226753, "epoch": 9.04, "learning_rate": 4.03023777587033e-05, "loss": 0.073, "step": 9516, "task_loss": 0.18590320646762848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015560157597064972, "epoch": 9.04, "learning_rate": 4.029395036961168e-05, "loss": 0.0148, "step": 9517, "task_loss": 0.007637334987521172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02795090712606907, "epoch": 9.04, "learning_rate": 4.028552020230031e-05, "loss": 0.0278, "step": 9518, "task_loss": 0.026432598009705544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016976818442344666, "epoch": 9.04, "learning_rate": 4.0277087258300575e-05, "loss": 0.0156, "step": 9519, "task_loss": 0.0029955413192510605 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010076291859149933, "epoch": 9.04, "learning_rate": 4.0268651539144374e-05, "loss": 0.0094, "step": 9520, "task_loss": 0.003336844965815544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04456236958503723, "epoch": 9.04, "learning_rate": 4.026021304636408e-05, "loss": 0.0541, "step": 9521, "task_loss": 0.13946330547332764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02801411598920822, "epoch": 9.04, "learning_rate": 4.0251771781492594e-05, "loss": 0.0277, "step": 9522, "task_loss": 0.02517678588628769 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02441570535302162, "epoch": 9.04, "learning_rate": 4.0243327746063315e-05, "loss": 0.0303, "step": 9523, "task_loss": 0.08282805979251862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03433768451213837, "epoch": 9.04, "learning_rate": 4.0234880941610134e-05, "loss": 0.0363, "step": 9524, "task_loss": 0.05413848161697388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016913043335080147, "epoch": 9.05, "learning_rate": 4.022643136966746e-05, "loss": 0.026, "step": 9525, "task_loss": 0.10786878317594528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07082056999206543, "epoch": 9.05, "learning_rate": 4.021797903177019e-05, "loss": 0.0752, "step": 9526, "task_loss": 0.114901602268219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011534811928868294, "epoch": 9.05, "learning_rate": 4.0209523929453744e-05, "loss": 0.0107, "step": 9527, "task_loss": 0.002887837588787079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018662162125110626, "epoch": 9.05, "learning_rate": 4.0201066064254026e-05, "loss": 0.0173, "step": 9528, "task_loss": 0.004804331809282303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1188516914844513, "epoch": 9.05, "learning_rate": 4.019260543770745e-05, "loss": 0.1143, "step": 9529, "task_loss": 0.07374230027198792 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022575611248612404, "epoch": 9.05, "learning_rate": 4.018414205135093e-05, "loss": 0.0265, "step": 9530, "task_loss": 0.06145612150430679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012420643121004105, "epoch": 9.05, "learning_rate": 4.017567590672187e-05, "loss": 0.0117, "step": 9531, "task_loss": 0.005104459822177887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032812122255563736, "epoch": 9.05, "learning_rate": 4.01672070053582e-05, "loss": 0.0399, "step": 9532, "task_loss": 0.10327274352312088 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023092597723007202, "epoch": 9.05, "learning_rate": 4.015873534879833e-05, "loss": 0.0245, "step": 9533, "task_loss": 0.03673432022333145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037473469972610474, "epoch": 9.05, "learning_rate": 4.015026093858119e-05, "loss": 0.0472, "step": 9534, "task_loss": 0.13475218415260315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05353183299303055, "epoch": 9.06, "learning_rate": 4.014178377624617e-05, "loss": 0.0514, "step": 9535, "task_loss": 0.032544951885938644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02789144590497017, "epoch": 9.06, "learning_rate": 4.013330386333321e-05, "loss": 0.0303, "step": 9536, "task_loss": 0.052158139646053314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009651558473706245, "epoch": 9.06, "learning_rate": 4.012482120138272e-05, "loss": 0.0091, "step": 9537, "task_loss": 0.003987642005085945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0357995480298996, "epoch": 9.06, "learning_rate": 4.011633579193561e-05, "loss": 0.0406, "step": 9538, "task_loss": 0.08331019431352615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017858661711215973, "epoch": 9.06, "learning_rate": 4.010784763653331e-05, "loss": 0.0252, "step": 9539, "task_loss": 0.09111450612545013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06131261587142944, "epoch": 9.06, "learning_rate": 4.0099356736717725e-05, "loss": 0.0596, "step": 9540, "task_loss": 0.04372568055987358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021268391981720924, "epoch": 9.06, "learning_rate": 4.0090863094031274e-05, "loss": 0.0294, "step": 9541, "task_loss": 0.10256145894527435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030995316803455353, "epoch": 9.06, "learning_rate": 4.008236671001686e-05, "loss": 0.0286, "step": 9542, "task_loss": 0.007312217727303505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016386138275265694, "epoch": 9.06, "learning_rate": 4.0073867586217895e-05, "loss": 0.0153, "step": 9543, "task_loss": 0.005437880754470825 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02716147154569626, "epoch": 9.06, "learning_rate": 4.006536572417828e-05, "loss": 0.0256, "step": 9544, "task_loss": 0.011989755555987358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03949110209941864, "epoch": 9.06, "learning_rate": 4.0056861125442435e-05, "loss": 0.0435, "step": 9545, "task_loss": 0.07948499917984009 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060796599835157394, "epoch": 9.07, "learning_rate": 4.004835379155525e-05, "loss": 0.0645, "step": 9546, "task_loss": 0.09797711670398712 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00800246186554432, "epoch": 9.07, "learning_rate": 4.003984372406212e-05, "loss": 0.0078, "step": 9547, "task_loss": 0.0055456701666116714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01699589565396309, "epoch": 9.07, "learning_rate": 4.003133092450895e-05, "loss": 0.0158, "step": 9548, "task_loss": 0.005228973925113678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0232367105782032, "epoch": 9.07, "learning_rate": 4.002281539444213e-05, "loss": 0.03, "step": 9549, "task_loss": 0.09088429808616638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027070309966802597, "epoch": 9.07, "learning_rate": 4.001429713540853e-05, "loss": 0.0249, "step": 9550, "task_loss": 0.005381651222705841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030805271118879318, "epoch": 9.07, "learning_rate": 4.000577614895555e-05, "loss": 0.031, "step": 9551, "task_loss": 0.03288976103067398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15201908349990845, "epoch": 9.07, "learning_rate": 3.999725243663107e-05, "loss": 0.1577, "step": 9552, "task_loss": 0.2092512547969818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03263195604085922, "epoch": 9.07, "learning_rate": 3.9988725999983456e-05, "loss": 0.0373, "step": 9553, "task_loss": 0.07890980690717697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13403475284576416, "epoch": 9.07, "learning_rate": 3.998019684056158e-05, "loss": 0.1317, "step": 9554, "task_loss": 0.11034642159938812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04724042862653732, "epoch": 9.07, "learning_rate": 3.99716649599148e-05, "loss": 0.0483, "step": 9555, "task_loss": 0.058092519640922546 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0314975380897522, "epoch": 9.08, "learning_rate": 3.996313035959297e-05, "loss": 0.037, "step": 9556, "task_loss": 0.0861077755689621 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01337359193712473, "epoch": 9.08, "learning_rate": 3.995459304114645e-05, "loss": 0.0126, "step": 9557, "task_loss": 0.005379321053624153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02194923162460327, "epoch": 9.08, "learning_rate": 3.9946053006126086e-05, "loss": 0.0286, "step": 9558, "task_loss": 0.08856151252985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021245867013931274, "epoch": 9.08, "learning_rate": 3.993751025608321e-05, "loss": 0.0271, "step": 9559, "task_loss": 0.0795937329530716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025511398911476135, "epoch": 9.08, "learning_rate": 3.9928964792569655e-05, "loss": 0.0235, "step": 9560, "task_loss": 0.005805861204862595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018126703798770905, "epoch": 9.08, "learning_rate": 3.9920416617137745e-05, "loss": 0.017, "step": 9561, "task_loss": 0.007210768759250641 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018573222681879997, "epoch": 9.08, "learning_rate": 3.9911865731340306e-05, "loss": 0.0233, "step": 9562, "task_loss": 0.06605537980794907 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023824790492653847, "epoch": 9.08, "learning_rate": 3.9903312136730634e-05, "loss": 0.0321, "step": 9563, "task_loss": 0.10653051733970642 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10854936391115189, "epoch": 9.08, "learning_rate": 3.989475583486254e-05, "loss": 0.105, "step": 9564, "task_loss": 0.07346247136592865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019099000841379166, "epoch": 9.08, "learning_rate": 3.988619682729032e-05, "loss": 0.0268, "step": 9565, "task_loss": 0.09585027396678925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021908055990934372, "epoch": 9.08, "learning_rate": 3.987763511556874e-05, "loss": 0.0296, "step": 9566, "task_loss": 0.09896925091743469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013894637115299702, "epoch": 9.09, "learning_rate": 3.98690707012531e-05, "loss": 0.0128, "step": 9567, "task_loss": 0.0032347403466701508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03824283555150032, "epoch": 9.09, "learning_rate": 3.986050358589916e-05, "loss": 0.0488, "step": 9568, "task_loss": 0.14340779185295105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03835334628820419, "epoch": 9.09, "learning_rate": 3.9851933771063166e-05, "loss": 0.0349, "step": 9569, "task_loss": 0.003623614087700844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03116435743868351, "epoch": 9.09, "learning_rate": 3.9843361258301876e-05, "loss": 0.0385, "step": 9570, "task_loss": 0.10502012073993683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04906230419874191, "epoch": 9.09, "learning_rate": 3.983478604917253e-05, "loss": 0.0574, "step": 9571, "task_loss": 0.13289552927017212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04502825438976288, "epoch": 9.09, "learning_rate": 3.9826208145232855e-05, "loss": 0.0679, "step": 9572, "task_loss": 0.273649126291275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036221615970134735, "epoch": 9.09, "learning_rate": 3.981762754804107e-05, "loss": 0.0414, "step": 9573, "task_loss": 0.08760883659124374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05786776542663574, "epoch": 9.09, "learning_rate": 3.980904425915586e-05, "loss": 0.0567, "step": 9574, "task_loss": 0.04585399478673935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0210392028093338, "epoch": 9.09, "learning_rate": 3.9800458280136453e-05, "loss": 0.0198, "step": 9575, "task_loss": 0.008328957483172417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047202907502651215, "epoch": 9.09, "learning_rate": 3.979186961254252e-05, "loss": 0.0497, "step": 9576, "task_loss": 0.07255647331476212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02497425302863121, "epoch": 9.09, "learning_rate": 3.9783278257934233e-05, "loss": 0.0233, "step": 9577, "task_loss": 0.00862952508032322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031044363975524902, "epoch": 9.1, "learning_rate": 3.977468421787225e-05, "loss": 0.0361, "step": 9578, "task_loss": 0.08184700459241867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023872077465057373, "epoch": 9.1, "learning_rate": 3.976608749391773e-05, "loss": 0.0291, "step": 9579, "task_loss": 0.0763431191444397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021440522745251656, "epoch": 9.1, "learning_rate": 3.975748808763229e-05, "loss": 0.0202, "step": 9580, "task_loss": 0.009024685248732567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017536599189043045, "epoch": 9.1, "learning_rate": 3.974888600057808e-05, "loss": 0.0187, "step": 9581, "task_loss": 0.029022112488746643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03331207111477852, "epoch": 9.1, "learning_rate": 3.974028123431769e-05, "loss": 0.0486, "step": 9582, "task_loss": 0.1862955540418625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01485717948526144, "epoch": 9.1, "learning_rate": 3.973167379041421e-05, "loss": 0.0142, "step": 9583, "task_loss": 0.0080320555716753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03911569342017174, "epoch": 9.1, "learning_rate": 3.972306367043126e-05, "loss": 0.043, "step": 9584, "task_loss": 0.07806295156478882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1487078070640564, "epoch": 9.1, "learning_rate": 3.971445087593288e-05, "loss": 0.1381, "step": 9585, "task_loss": 0.04280809685587883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01248634047806263, "epoch": 9.1, "learning_rate": 3.970583540848363e-05, "loss": 0.0155, "step": 9586, "task_loss": 0.04234500974416733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015881624072790146, "epoch": 9.1, "learning_rate": 3.969721726964856e-05, "loss": 0.0236, "step": 9587, "task_loss": 0.09326736629009247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028029238805174828, "epoch": 9.11, "learning_rate": 3.9688596460993176e-05, "loss": 0.0332, "step": 9588, "task_loss": 0.07947677373886108 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018526611849665642, "epoch": 9.11, "learning_rate": 3.967997298408352e-05, "loss": 0.0217, "step": 9589, "task_loss": 0.04995952919125557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02319607511162758, "epoch": 9.11, "learning_rate": 3.967134684048607e-05, "loss": 0.0304, "step": 9590, "task_loss": 0.09573376178741455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03988368809223175, "epoch": 9.11, "learning_rate": 3.96627180317678e-05, "loss": 0.0397, "step": 9591, "task_loss": 0.03814127668738365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030341364443302155, "epoch": 9.11, "learning_rate": 3.965408655949619e-05, "loss": 0.0277, "step": 9592, "task_loss": 0.004290319979190826 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018847042694687843, "epoch": 9.11, "learning_rate": 3.964545242523917e-05, "loss": 0.0174, "step": 9593, "task_loss": 0.004647746682167053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05705046281218529, "epoch": 9.11, "learning_rate": 3.9636815630565194e-05, "loss": 0.055, "step": 9594, "task_loss": 0.03697717562317848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02136809565126896, "epoch": 9.11, "learning_rate": 3.962817617704317e-05, "loss": 0.0265, "step": 9595, "task_loss": 0.07274884730577469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.2957301437854767, "epoch": 9.11, "learning_rate": 3.9619534066242485e-05, "loss": 0.281, "step": 9596, "task_loss": 0.14882266521453857 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018483705818653107, "epoch": 9.11, "learning_rate": 3.961088929973303e-05, "loss": 0.0266, "step": 9597, "task_loss": 0.09919779002666473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031163640320301056, "epoch": 9.11, "learning_rate": 3.960224187908518e-05, "loss": 0.0309, "step": 9598, "task_loss": 0.028584491461515427 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0134575255215168, "epoch": 9.12, "learning_rate": 3.959359180586975e-05, "loss": 0.0128, "step": 9599, "task_loss": 0.006391104310750961 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020069237798452377, "epoch": 9.12, "learning_rate": 3.958493908165809e-05, "loss": 0.0185, "step": 9600, "task_loss": 0.004250597208738327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04818892478942871, "epoch": 9.12, "learning_rate": 3.9576283708022e-05, "loss": 0.0446, "step": 9601, "task_loss": 0.012091221287846565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06722578406333923, "epoch": 9.12, "learning_rate": 3.956762568653378e-05, "loss": 0.0683, "step": 9602, "task_loss": 0.07760586589574814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021520771086215973, "epoch": 9.12, "learning_rate": 3.95589650187662e-05, "loss": 0.0263, "step": 9603, "task_loss": 0.06911582499742508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07241514325141907, "epoch": 9.12, "learning_rate": 3.95503017062925e-05, "loss": 0.0696, "step": 9604, "task_loss": 0.04403474181890488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0638948380947113, "epoch": 9.12, "learning_rate": 3.954163575068643e-05, "loss": 0.0619, "step": 9605, "task_loss": 0.04400301352143288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014446464367210865, "epoch": 9.12, "learning_rate": 3.953296715352218e-05, "loss": 0.0136, "step": 9606, "task_loss": 0.0059725940227508545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016270997002720833, "epoch": 9.12, "learning_rate": 3.952429591637446e-05, "loss": 0.0242, "step": 9607, "task_loss": 0.095741406083107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025010449811816216, "epoch": 9.12, "learning_rate": 3.951562204081845e-05, "loss": 0.0283, "step": 9608, "task_loss": 0.057582780718803406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07090044021606445, "epoch": 9.13, "learning_rate": 3.950694552842977e-05, "loss": 0.0913, "step": 9609, "task_loss": 0.2748509645462036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022294355556368828, "epoch": 9.13, "learning_rate": 3.949826638078457e-05, "loss": 0.0367, "step": 9610, "task_loss": 0.16597087681293488 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02015480399131775, "epoch": 9.13, "learning_rate": 3.948958459945946e-05, "loss": 0.0314, "step": 9611, "task_loss": 0.13252711296081543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06596086174249649, "epoch": 9.13, "learning_rate": 3.948090018603153e-05, "loss": 0.0796, "step": 9612, "task_loss": 0.20231035351753235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018903814256191254, "epoch": 9.13, "learning_rate": 3.947221314207834e-05, "loss": 0.0329, "step": 9613, "task_loss": 0.15914104878902435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03513910248875618, "epoch": 9.13, "learning_rate": 3.9463523469177935e-05, "loss": 0.0369, "step": 9614, "task_loss": 0.05302010476589203 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04065433889627457, "epoch": 9.13, "learning_rate": 3.9454831168908824e-05, "loss": 0.0387, "step": 9615, "task_loss": 0.021478936076164246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02802848257124424, "epoch": 9.13, "learning_rate": 3.9446136242850025e-05, "loss": 0.0257, "step": 9616, "task_loss": 0.004691721871495247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09828861057758331, "epoch": 9.13, "learning_rate": 3.9437438692581e-05, "loss": 0.0998, "step": 9617, "task_loss": 0.11373132467269897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05762851983308792, "epoch": 9.13, "learning_rate": 3.9428738519681704e-05, "loss": 0.0569, "step": 9618, "task_loss": 0.04999490827322006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03731340914964676, "epoch": 9.13, "learning_rate": 3.942003572573257e-05, "loss": 0.037, "step": 9619, "task_loss": 0.0341169647872448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023080751299858093, "epoch": 9.14, "learning_rate": 3.94113303123145e-05, "loss": 0.0346, "step": 9620, "task_loss": 0.13836701214313507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014194730669260025, "epoch": 9.14, "learning_rate": 3.9402622281008874e-05, "loss": 0.0199, "step": 9621, "task_loss": 0.07124481350183487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05054982379078865, "epoch": 9.14, "learning_rate": 3.939391163339754e-05, "loss": 0.0559, "step": 9622, "task_loss": 0.10361889749765396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014216311275959015, "epoch": 9.14, "learning_rate": 3.9385198371062845e-05, "loss": 0.0241, "step": 9623, "task_loss": 0.11282174289226532 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014683485962450504, "epoch": 9.14, "learning_rate": 3.937648249558758e-05, "loss": 0.0136, "step": 9624, "task_loss": 0.0039719510823488235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016595659777522087, "epoch": 9.14, "learning_rate": 3.9367764008555034e-05, "loss": 0.0156, "step": 9625, "task_loss": 0.006236037239432335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015016734600067139, "epoch": 9.14, "learning_rate": 3.9359042911548955e-05, "loss": 0.014, "step": 9626, "task_loss": 0.005279116332530975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012623798102140427, "epoch": 9.14, "learning_rate": 3.935031920615358e-05, "loss": 0.016, "step": 9627, "task_loss": 0.04630003124475479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05201908200979233, "epoch": 9.14, "learning_rate": 3.934159289395361e-05, "loss": 0.0515, "step": 9628, "task_loss": 0.04690450802445412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02428932674229145, "epoch": 9.14, "learning_rate": 3.9332863976534225e-05, "loss": 0.0316, "step": 9629, "task_loss": 0.0969204306602478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03481846675276756, "epoch": 9.15, "learning_rate": 3.9324132455481064e-05, "loss": 0.0322, "step": 9630, "task_loss": 0.008588599041104317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011342018842697144, "epoch": 9.15, "learning_rate": 3.931539833238026e-05, "loss": 0.0157, "step": 9631, "task_loss": 0.05512590333819389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04356681555509567, "epoch": 9.15, "learning_rate": 3.930666160881841e-05, "loss": 0.0502, "step": 9632, "task_loss": 0.10991165041923523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03675740957260132, "epoch": 9.15, "learning_rate": 3.9297922286382573e-05, "loss": 0.0385, "step": 9633, "task_loss": 0.05395536124706268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012938008643686771, "epoch": 9.15, "learning_rate": 3.928918036666029e-05, "loss": 0.0123, "step": 9634, "task_loss": 0.006980478763580322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038404569029808044, "epoch": 9.15, "learning_rate": 3.928043585123957e-05, "loss": 0.0375, "step": 9635, "task_loss": 0.029215719550848007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04972174018621445, "epoch": 9.15, "learning_rate": 3.927168874170891e-05, "loss": 0.0643, "step": 9636, "task_loss": 0.19559511542320251 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0464179627597332, "epoch": 9.15, "learning_rate": 3.926293903965726e-05, "loss": 0.0465, "step": 9637, "task_loss": 0.04701643064618111 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06579018384218216, "epoch": 9.15, "learning_rate": 3.925418674667405e-05, "loss": 0.0745, "step": 9638, "task_loss": 0.1530209332704544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04052596539258957, "epoch": 9.15, "learning_rate": 3.924543186434915e-05, "loss": 0.0394, "step": 9639, "task_loss": 0.029542801901698112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06306673586368561, "epoch": 9.15, "learning_rate": 3.923667439427295e-05, "loss": 0.066, "step": 9640, "task_loss": 0.09248776733875275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04041620343923569, "epoch": 9.16, "learning_rate": 3.922791433803629e-05, "loss": 0.0383, "step": 9641, "task_loss": 0.019168421626091003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015475798398256302, "epoch": 9.16, "learning_rate": 3.921915169723046e-05, "loss": 0.0145, "step": 9642, "task_loss": 0.005340602248907089 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12808610498905182, "epoch": 9.16, "learning_rate": 3.921038647344725e-05, "loss": 0.1262, "step": 9643, "task_loss": 0.10924281924962997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04638529568910599, "epoch": 9.16, "learning_rate": 3.920161866827889e-05, "loss": 0.0483, "step": 9644, "task_loss": 0.06601787358522415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13820701837539673, "epoch": 9.16, "learning_rate": 3.9192848283318114e-05, "loss": 0.1419, "step": 9645, "task_loss": 0.17469622194766998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0257248692214489, "epoch": 9.16, "learning_rate": 3.91840753201581e-05, "loss": 0.0321, "step": 9646, "task_loss": 0.08985202014446259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057688090950250626, "epoch": 9.16, "learning_rate": 3.917529978039247e-05, "loss": 0.0599, "step": 9647, "task_loss": 0.079569973051548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02995416149497032, "epoch": 9.16, "learning_rate": 3.9166521665615386e-05, "loss": 0.0362, "step": 9648, "task_loss": 0.09234865754842758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014706909656524658, "epoch": 9.16, "learning_rate": 3.91577409774214e-05, "loss": 0.0136, "step": 9649, "task_loss": 0.0036916378885507584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04012545198202133, "epoch": 9.16, "learning_rate": 3.9148957717405596e-05, "loss": 0.0443, "step": 9650, "task_loss": 0.08144651353359222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03406708687543869, "epoch": 9.17, "learning_rate": 3.914017188716347e-05, "loss": 0.0402, "step": 9651, "task_loss": 0.09540209174156189 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022918783128261566, "epoch": 9.17, "learning_rate": 3.913138348829102e-05, "loss": 0.0348, "step": 9652, "task_loss": 0.14145193994045258 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013142495416104794, "epoch": 9.17, "learning_rate": 3.91225925223847e-05, "loss": 0.0121, "step": 9653, "task_loss": 0.0028307754546403885 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036516182124614716, "epoch": 9.17, "learning_rate": 3.911379899104144e-05, "loss": 0.0346, "step": 9654, "task_loss": 0.016904115676879883 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04557647556066513, "epoch": 9.17, "learning_rate": 3.910500289585862e-05, "loss": 0.045, "step": 9655, "task_loss": 0.04023095220327377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01984507404267788, "epoch": 9.17, "learning_rate": 3.90962042384341e-05, "loss": 0.0186, "step": 9656, "task_loss": 0.0074311550706624985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0343051478266716, "epoch": 9.17, "learning_rate": 3.908740302036618e-05, "loss": 0.0397, "step": 9657, "task_loss": 0.08872069418430328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024765335023403168, "epoch": 9.17, "learning_rate": 3.907859924325366e-05, "loss": 0.0381, "step": 9658, "task_loss": 0.15835905075073242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12249832600355148, "epoch": 9.17, "learning_rate": 3.906979290869578e-05, "loss": 0.1308, "step": 9659, "task_loss": 0.20595303177833557 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012475738301873207, "epoch": 9.17, "learning_rate": 3.9060984018292267e-05, "loss": 0.0118, "step": 9660, "task_loss": 0.0056333523243665695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023209767416119576, "epoch": 9.17, "learning_rate": 3.905217257364328e-05, "loss": 0.0386, "step": 9661, "task_loss": 0.17690522968769073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04662572965025902, "epoch": 9.18, "learning_rate": 3.904335857634948e-05, "loss": 0.0445, "step": 9662, "task_loss": 0.0256513562053442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04135457053780556, "epoch": 9.18, "learning_rate": 3.9034542028011944e-05, "loss": 0.0443, "step": 9663, "task_loss": 0.07109534740447998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036145225167274475, "epoch": 9.18, "learning_rate": 3.902572293023227e-05, "loss": 0.0458, "step": 9664, "task_loss": 0.13244393467903137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03712350130081177, "epoch": 9.18, "learning_rate": 3.9016901284612474e-05, "loss": 0.0569, "step": 9665, "task_loss": 0.23489037156105042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011418421752750874, "epoch": 9.18, "learning_rate": 3.9008077092755055e-05, "loss": 0.0162, "step": 9666, "task_loss": 0.058784160763025284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08879618346691132, "epoch": 9.18, "learning_rate": 3.899925035626296e-05, "loss": 0.0833, "step": 9667, "task_loss": 0.03357243910431862 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10656376928091049, "epoch": 9.18, "learning_rate": 3.899042107673962e-05, "loss": 0.1155, "step": 9668, "task_loss": 0.19606941938400269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02124224603176117, "epoch": 9.18, "learning_rate": 3.898158925578893e-05, "loss": 0.0316, "step": 9669, "task_loss": 0.12506425380706787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023364577442407608, "epoch": 9.18, "learning_rate": 3.89727548950152e-05, "loss": 0.0221, "step": 9670, "task_loss": 0.01049577072262764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.054638948291540146, "epoch": 9.18, "learning_rate": 3.8963917996023245e-05, "loss": 0.0538, "step": 9671, "task_loss": 0.04668677970767021 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03254498541355133, "epoch": 9.19, "learning_rate": 3.8955078560418345e-05, "loss": 0.0449, "step": 9672, "task_loss": 0.15559810400009155 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020883910357952118, "epoch": 9.19, "learning_rate": 3.894623658980622e-05, "loss": 0.0193, "step": 9673, "task_loss": 0.005325049161911011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1067768931388855, "epoch": 9.19, "learning_rate": 3.8937392085793036e-05, "loss": 0.1221, "step": 9674, "task_loss": 0.2597641348838806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018741261214017868, "epoch": 9.19, "learning_rate": 3.892854504998546e-05, "loss": 0.029, "step": 9675, "task_loss": 0.12132866680622101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011632721871137619, "epoch": 9.19, "learning_rate": 3.891969548399061e-05, "loss": 0.0203, "step": 9676, "task_loss": 0.09824814647436142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017414584755897522, "epoch": 9.19, "learning_rate": 3.891084338941603e-05, "loss": 0.0162, "step": 9677, "task_loss": 0.00522034615278244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012747041881084442, "epoch": 9.19, "learning_rate": 3.8901988767869744e-05, "loss": 0.0118, "step": 9678, "task_loss": 0.0028500892221927643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.055840395390987396, "epoch": 9.19, "learning_rate": 3.8893131620960254e-05, "loss": 0.057, "step": 9679, "task_loss": 0.06704328209161758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03462572395801544, "epoch": 9.19, "learning_rate": 3.88842719502965e-05, "loss": 0.0367, "step": 9680, "task_loss": 0.05576511472463608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.050681374967098236, "epoch": 9.19, "learning_rate": 3.887540975748787e-05, "loss": 0.0543, "step": 9681, "task_loss": 0.08639328181743622 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029797300696372986, "epoch": 9.19, "learning_rate": 3.8866545044144234e-05, "loss": 0.0337, "step": 9682, "task_loss": 0.06878768652677536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014516827650368214, "epoch": 9.2, "learning_rate": 3.885767781187591e-05, "loss": 0.0236, "step": 9683, "task_loss": 0.10563208907842636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013084099628031254, "epoch": 9.2, "learning_rate": 3.884880806229367e-05, "loss": 0.0139, "step": 9684, "task_loss": 0.020864391699433327 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022157765924930573, "epoch": 9.2, "learning_rate": 3.883993579700875e-05, "loss": 0.0206, "step": 9685, "task_loss": 0.006276823580265045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059577975422143936, "epoch": 9.2, "learning_rate": 3.883106101763285e-05, "loss": 0.0571, "step": 9686, "task_loss": 0.03433872014284134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04622458666563034, "epoch": 9.2, "learning_rate": 3.882218372577809e-05, "loss": 0.06, "step": 9687, "task_loss": 0.18399886786937714 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030847573652863503, "epoch": 9.2, "learning_rate": 3.881330392305709e-05, "loss": 0.0407, "step": 9688, "task_loss": 0.1292884647846222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10208854079246521, "epoch": 9.2, "learning_rate": 3.8804421611082916e-05, "loss": 0.1025, "step": 9689, "task_loss": 0.10571222007274628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07040687650442123, "epoch": 9.2, "learning_rate": 3.8795536791469066e-05, "loss": 0.0801, "step": 9690, "task_loss": 0.16708990931510925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022444941103458405, "epoch": 9.2, "learning_rate": 3.8786649465829516e-05, "loss": 0.0288, "step": 9691, "task_loss": 0.08632338047027588 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014007965102791786, "epoch": 9.2, "learning_rate": 3.8777759635778696e-05, "loss": 0.0129, "step": 9692, "task_loss": 0.003078687936067581 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03719889000058174, "epoch": 9.21, "learning_rate": 3.876886730293149e-05, "loss": 0.0361, "step": 9693, "task_loss": 0.0266589242964983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01961357146501541, "epoch": 9.21, "learning_rate": 3.8759972468903215e-05, "loss": 0.0255, "step": 9694, "task_loss": 0.07875269651412964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021031856536865234, "epoch": 9.21, "learning_rate": 3.875107513530968e-05, "loss": 0.0209, "step": 9695, "task_loss": 0.01938748173415661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0264435987919569, "epoch": 9.21, "learning_rate": 3.874217530376711e-05, "loss": 0.0375, "step": 9696, "task_loss": 0.13744567334651947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07807254046201706, "epoch": 9.21, "learning_rate": 3.873327297589223e-05, "loss": 0.0742, "step": 9697, "task_loss": 0.03972852602601051 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024312328547239304, "epoch": 9.21, "learning_rate": 3.8724368153302166e-05, "loss": 0.0274, "step": 9698, "task_loss": 0.055370982736349106 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01522772666066885, "epoch": 9.21, "learning_rate": 3.871546083761453e-05, "loss": 0.0185, "step": 9699, "task_loss": 0.04807128384709358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10017004609107971, "epoch": 9.21, "learning_rate": 3.870655103044738e-05, "loss": 0.1018, "step": 9700, "task_loss": 0.11637624353170395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04961914196610451, "epoch": 9.21, "learning_rate": 3.8697638733419216e-05, "loss": 0.0533, "step": 9701, "task_loss": 0.086252860724926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06641680747270584, "epoch": 9.21, "learning_rate": 3.8688723948149014e-05, "loss": 0.0691, "step": 9702, "task_loss": 0.0931624248623848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025418289005756378, "epoch": 9.21, "learning_rate": 3.867980667625618e-05, "loss": 0.0309, "step": 9703, "task_loss": 0.07991056889295578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017918124794960022, "epoch": 9.22, "learning_rate": 3.867088691936058e-05, "loss": 0.0227, "step": 9704, "task_loss": 0.06607376039028168 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051767975091934204, "epoch": 9.22, "learning_rate": 3.8661964679082535e-05, "loss": 0.0597, "step": 9705, "task_loss": 0.13116154074668884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030053768306970596, "epoch": 9.22, "learning_rate": 3.8653039957042806e-05, "loss": 0.0285, "step": 9706, "task_loss": 0.014347141608595848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010007740929722786, "epoch": 9.22, "learning_rate": 3.8644112754862614e-05, "loss": 0.0093, "step": 9707, "task_loss": 0.002756282687187195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03509853407740593, "epoch": 9.22, "learning_rate": 3.8635183074163636e-05, "loss": 0.038, "step": 9708, "task_loss": 0.06392055749893188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02037603035569191, "epoch": 9.22, "learning_rate": 3.862625091656797e-05, "loss": 0.0211, "step": 9709, "task_loss": 0.02797846868634224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04154079779982567, "epoch": 9.22, "learning_rate": 3.861731628369822e-05, "loss": 0.0651, "step": 9710, "task_loss": 0.2771263122558594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028145533055067062, "epoch": 9.22, "learning_rate": 3.8608379177177375e-05, "loss": 0.042, "step": 9711, "task_loss": 0.1668614000082016 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018877774477005005, "epoch": 9.22, "learning_rate": 3.8599439598628916e-05, "loss": 0.0314, "step": 9712, "task_loss": 0.14435160160064697 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.128778338432312, "epoch": 9.22, "learning_rate": 3.8590497549676753e-05, "loss": 0.1268, "step": 9713, "task_loss": 0.1085672602057457 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02545616589486599, "epoch": 9.23, "learning_rate": 3.858155303194526e-05, "loss": 0.0276, "step": 9714, "task_loss": 0.0471004918217659 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025828856974840164, "epoch": 9.23, "learning_rate": 3.8572606047059254e-05, "loss": 0.0246, "step": 9715, "task_loss": 0.01345803216099739 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018282314762473106, "epoch": 9.23, "learning_rate": 3.856365659664399e-05, "loss": 0.0225, "step": 9716, "task_loss": 0.06006855517625809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0179976187646389, "epoch": 9.23, "learning_rate": 3.855470468232518e-05, "loss": 0.0237, "step": 9717, "task_loss": 0.07452400773763657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017134424299001694, "epoch": 9.23, "learning_rate": 3.854575030572898e-05, "loss": 0.0229, "step": 9718, "task_loss": 0.0750466138124466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015787985175848007, "epoch": 9.23, "learning_rate": 3.853679346848201e-05, "loss": 0.0209, "step": 9719, "task_loss": 0.06708446890115738 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023376762866973877, "epoch": 9.23, "learning_rate": 3.8527834172211306e-05, "loss": 0.0362, "step": 9720, "task_loss": 0.15146197378635406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017823493108153343, "epoch": 9.23, "learning_rate": 3.851887241854438e-05, "loss": 0.0172, "step": 9721, "task_loss": 0.011508205905556679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01219139527529478, "epoch": 9.23, "learning_rate": 3.850990820910917e-05, "loss": 0.0114, "step": 9722, "task_loss": 0.003914957866072655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021136948838829994, "epoch": 9.23, "learning_rate": 3.8500941545534065e-05, "loss": 0.0259, "step": 9723, "task_loss": 0.06834466755390167 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05498252436518669, "epoch": 9.23, "learning_rate": 3.849197242944791e-05, "loss": 0.0571, "step": 9724, "task_loss": 0.07627555727958679 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03645971044898033, "epoch": 9.24, "learning_rate": 3.8483000862479986e-05, "loss": 0.0411, "step": 9725, "task_loss": 0.08248203247785568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023510009050369263, "epoch": 9.24, "learning_rate": 3.8474026846260015e-05, "loss": 0.0419, "step": 9726, "task_loss": 0.20696833729743958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037108682096004486, "epoch": 9.24, "learning_rate": 3.846505038241818e-05, "loss": 0.0336, "step": 9727, "task_loss": 0.0021884366869926453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05788455903530121, "epoch": 9.24, "learning_rate": 3.84560714725851e-05, "loss": 0.0531, "step": 9728, "task_loss": 0.009828178212046623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01626589521765709, "epoch": 9.24, "learning_rate": 3.8447090118391814e-05, "loss": 0.0339, "step": 9729, "task_loss": 0.19226785004138947 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044995106756687164, "epoch": 9.24, "learning_rate": 3.8438106321469864e-05, "loss": 0.0445, "step": 9730, "task_loss": 0.03956909477710724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018232377246022224, "epoch": 9.24, "learning_rate": 3.842912008345117e-05, "loss": 0.0229, "step": 9731, "task_loss": 0.0647912323474884 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01168222725391388, "epoch": 9.24, "learning_rate": 3.842013140596815e-05, "loss": 0.0143, "step": 9732, "task_loss": 0.0380130261182785 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011876760981976986, "epoch": 9.24, "learning_rate": 3.841114029065362e-05, "loss": 0.011, "step": 9733, "task_loss": 0.003131164237856865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05818028748035431, "epoch": 9.24, "learning_rate": 3.8402146739140874e-05, "loss": 0.0713, "step": 9734, "task_loss": 0.1889064908027649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016233699396252632, "epoch": 9.25, "learning_rate": 3.8393150753063614e-05, "loss": 0.0202, "step": 9735, "task_loss": 0.05590314790606499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06847208738327026, "epoch": 9.25, "learning_rate": 3.838415233405603e-05, "loss": 0.0667, "step": 9736, "task_loss": 0.050349440425634384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022901874035596848, "epoch": 9.25, "learning_rate": 3.837515148375271e-05, "loss": 0.021, "step": 9737, "task_loss": 0.003996755927801132 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02838345803320408, "epoch": 9.25, "learning_rate": 3.836614820378871e-05, "loss": 0.0341, "step": 9738, "task_loss": 0.08570387959480286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016883879899978638, "epoch": 9.25, "learning_rate": 3.835714249579952e-05, "loss": 0.0167, "step": 9739, "task_loss": 0.015314383432269096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03494098410010338, "epoch": 9.25, "learning_rate": 3.8348134361421064e-05, "loss": 0.0337, "step": 9740, "task_loss": 0.022901371121406555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016907619312405586, "epoch": 9.25, "learning_rate": 3.8339123802289716e-05, "loss": 0.0157, "step": 9741, "task_loss": 0.00507272407412529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043327562510967255, "epoch": 9.25, "learning_rate": 3.8330110820042285e-05, "loss": 0.0487, "step": 9742, "task_loss": 0.09731481969356537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12834414839744568, "epoch": 9.25, "learning_rate": 3.8321095416316024e-05, "loss": 0.136, "step": 9743, "task_loss": 0.20463687181472778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032464127987623215, "epoch": 9.25, "learning_rate": 3.831207759274863e-05, "loss": 0.0303, "step": 9744, "task_loss": 0.011053359135985374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022374771535396576, "epoch": 9.25, "learning_rate": 3.8303057350978224e-05, "loss": 0.0216, "step": 9745, "task_loss": 0.014581706374883652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.045660026371479034, "epoch": 9.26, "learning_rate": 3.829403469264339e-05, "loss": 0.0425, "step": 9746, "task_loss": 0.013828214257955551 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023104814812541008, "epoch": 9.26, "learning_rate": 3.828500961938313e-05, "loss": 0.0377, "step": 9747, "task_loss": 0.1689058542251587 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010014375671744347, "epoch": 9.26, "learning_rate": 3.827598213283688e-05, "loss": 0.0095, "step": 9748, "task_loss": 0.004870768636465073 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11848193407058716, "epoch": 9.26, "learning_rate": 3.8266952234644545e-05, "loss": 0.1292, "step": 9749, "task_loss": 0.22540438175201416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12723466753959656, "epoch": 9.26, "learning_rate": 3.825791992644644e-05, "loss": 0.1362, "step": 9750, "task_loss": 0.21650384366512299 }, { "epoch": 9.26, "eval_accuracy": 0.9013761467889908, "eval_loss": 0.4256434738636017, "eval_runtime": 17.9197, "eval_samples_per_second": 48.662, "eval_steps_per_second": 6.083, "step": 9750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02083965763449669, "epoch": 9.26, "learning_rate": 3.824888520988333e-05, "loss": 0.0192, "step": 9751, "task_loss": 0.004488172009587288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025724343955516815, "epoch": 9.26, "learning_rate": 3.823984808659641e-05, "loss": 0.0374, "step": 9752, "task_loss": 0.14217713475227356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012440409511327744, "epoch": 9.26, "learning_rate": 3.8230808558227335e-05, "loss": 0.0116, "step": 9753, "task_loss": 0.0036589261144399643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009372575208544731, "epoch": 9.26, "learning_rate": 3.8221766626418155e-05, "loss": 0.009, "step": 9754, "task_loss": 0.005342619493603706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032062187790870667, "epoch": 9.26, "learning_rate": 3.821272229281139e-05, "loss": 0.0349, "step": 9755, "task_loss": 0.06090862303972244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05444857478141785, "epoch": 9.26, "learning_rate": 3.820367555904999e-05, "loss": 0.0574, "step": 9756, "task_loss": 0.08438717573881149 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02243727631866932, "epoch": 9.27, "learning_rate": 3.819462642677733e-05, "loss": 0.0375, "step": 9757, "task_loss": 0.17298638820648193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019842922687530518, "epoch": 9.27, "learning_rate": 3.818557489763724e-05, "loss": 0.0347, "step": 9758, "task_loss": 0.16849234700202942 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014697995036840439, "epoch": 9.27, "learning_rate": 3.817652097327396e-05, "loss": 0.0137, "step": 9759, "task_loss": 0.004293628036975861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0161375030875206, "epoch": 9.27, "learning_rate": 3.81674646553322e-05, "loss": 0.0251, "step": 9760, "task_loss": 0.10576816648244858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025952016934752464, "epoch": 9.27, "learning_rate": 3.815840594545706e-05, "loss": 0.0432, "step": 9761, "task_loss": 0.19875141978263855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030038634315133095, "epoch": 9.27, "learning_rate": 3.814934484529411e-05, "loss": 0.0387, "step": 9762, "task_loss": 0.11658079922199249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1559152901172638, "epoch": 9.27, "learning_rate": 3.8140281356489346e-05, "loss": 0.1487, "step": 9763, "task_loss": 0.08386341482400894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02892562374472618, "epoch": 9.27, "learning_rate": 3.8131215480689184e-05, "loss": 0.0478, "step": 9764, "task_loss": 0.21779313683509827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021166235208511353, "epoch": 9.27, "learning_rate": 3.812214721954049e-05, "loss": 0.0218, "step": 9765, "task_loss": 0.027633680030703545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021776802837848663, "epoch": 9.27, "learning_rate": 3.811307657469055e-05, "loss": 0.0203, "step": 9766, "task_loss": 0.007460303604602814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022118225693702698, "epoch": 9.28, "learning_rate": 3.8104003547787105e-05, "loss": 0.0211, "step": 9767, "task_loss": 0.011582162231206894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012279561720788479, "epoch": 9.28, "learning_rate": 3.809492814047831e-05, "loss": 0.0181, "step": 9768, "task_loss": 0.07050621509552002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022169344127178192, "epoch": 9.28, "learning_rate": 3.8085850354412745e-05, "loss": 0.0205, "step": 9769, "task_loss": 0.005162643268704414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032863132655620575, "epoch": 9.28, "learning_rate": 3.807677019123944e-05, "loss": 0.0387, "step": 9770, "task_loss": 0.09106673300266266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0191971268504858, "epoch": 9.28, "learning_rate": 3.806768765260785e-05, "loss": 0.021, "step": 9771, "task_loss": 0.037707261741161346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01197386346757412, "epoch": 9.28, "learning_rate": 3.805860274016787e-05, "loss": 0.0118, "step": 9772, "task_loss": 0.00973932072520256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024311240762472153, "epoch": 9.28, "learning_rate": 3.8049515455569816e-05, "loss": 0.0404, "step": 9773, "task_loss": 0.1852773129940033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01635361835360527, "epoch": 9.28, "learning_rate": 3.804042580046442e-05, "loss": 0.0222, "step": 9774, "task_loss": 0.07465977221727371 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023430045694112778, "epoch": 9.28, "learning_rate": 3.803133377650288e-05, "loss": 0.0294, "step": 9775, "task_loss": 0.08316051959991455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01706859841942787, "epoch": 9.28, "learning_rate": 3.80222393853368e-05, "loss": 0.0314, "step": 9776, "task_loss": 0.1608736664056778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01352263055741787, "epoch": 9.28, "learning_rate": 3.801314262861822e-05, "loss": 0.0127, "step": 9777, "task_loss": 0.005207255482673645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039057813584804535, "epoch": 9.29, "learning_rate": 3.800404350799961e-05, "loss": 0.036, "step": 9778, "task_loss": 0.008711079135537148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016933217644691467, "epoch": 9.29, "learning_rate": 3.799494202513386e-05, "loss": 0.0157, "step": 9779, "task_loss": 0.004555156454443932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07299914956092834, "epoch": 9.29, "learning_rate": 3.798583818167432e-05, "loss": 0.0931, "step": 9780, "task_loss": 0.27392470836639404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012407921254634857, "epoch": 9.29, "learning_rate": 3.797673197927473e-05, "loss": 0.0119, "step": 9781, "task_loss": 0.006941312924027443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019861984997987747, "epoch": 9.29, "learning_rate": 3.796762341958927e-05, "loss": 0.0185, "step": 9782, "task_loss": 0.0062296707183122635 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03457757458090782, "epoch": 9.29, "learning_rate": 3.795851250427257e-05, "loss": 0.0449, "step": 9783, "task_loss": 0.1378023773431778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07008830457925797, "epoch": 9.29, "learning_rate": 3.794939923497967e-05, "loss": 0.0876, "step": 9784, "task_loss": 0.2450743168592453 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020512813702225685, "epoch": 9.29, "learning_rate": 3.794028361336603e-05, "loss": 0.0237, "step": 9785, "task_loss": 0.05278385803103447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05949783697724342, "epoch": 9.29, "learning_rate": 3.793116564108754e-05, "loss": 0.0719, "step": 9786, "task_loss": 0.1833191066980362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012088644318282604, "epoch": 9.29, "learning_rate": 3.7922045319800545e-05, "loss": 0.0161, "step": 9787, "task_loss": 0.052241381257772446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0391070693731308, "epoch": 9.3, "learning_rate": 3.7912922651161783e-05, "loss": 0.0505, "step": 9788, "task_loss": 0.15337170660495758 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01692778989672661, "epoch": 9.3, "learning_rate": 3.790379763682844e-05, "loss": 0.0157, "step": 9789, "task_loss": 0.004635356366634369 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15806353092193604, "epoch": 9.3, "learning_rate": 3.7894670278458096e-05, "loss": 0.152, "step": 9790, "task_loss": 0.09754717350006104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12203462421894073, "epoch": 9.3, "learning_rate": 3.7885540577708804e-05, "loss": 0.1233, "step": 9791, "task_loss": 0.13422806560993195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07689625769853592, "epoch": 9.3, "learning_rate": 3.7876408536239006e-05, "loss": 0.0828, "step": 9792, "task_loss": 0.1364186704158783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03227274864912033, "epoch": 9.3, "learning_rate": 3.7867274155707585e-05, "loss": 0.0374, "step": 9793, "task_loss": 0.08388587832450867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03347526490688324, "epoch": 9.3, "learning_rate": 3.7858137437773845e-05, "loss": 0.0504, "step": 9794, "task_loss": 0.2030281275510788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02590757980942726, "epoch": 9.3, "learning_rate": 3.784899838409751e-05, "loss": 0.0291, "step": 9795, "task_loss": 0.05813925340771675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024779632687568665, "epoch": 9.3, "learning_rate": 3.783985699633874e-05, "loss": 0.0318, "step": 9796, "task_loss": 0.09515906125307083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12447907775640488, "epoch": 9.3, "learning_rate": 3.783071327615811e-05, "loss": 0.1319, "step": 9797, "task_loss": 0.19891595840454102 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06262899935245514, "epoch": 9.3, "learning_rate": 3.7821567225216615e-05, "loss": 0.0599, "step": 9798, "task_loss": 0.035038989037275314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03002050705254078, "epoch": 9.31, "learning_rate": 3.781241884517569e-05, "loss": 0.0338, "step": 9799, "task_loss": 0.06747624278068542 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04927913099527359, "epoch": 9.31, "learning_rate": 3.780326813769717e-05, "loss": 0.0507, "step": 9800, "task_loss": 0.06330694258213043 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03860379010438919, "epoch": 9.31, "learning_rate": 3.779411510444334e-05, "loss": 0.0354, "step": 9801, "task_loss": 0.006218938156962395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028020869940519333, "epoch": 9.31, "learning_rate": 3.778495974707688e-05, "loss": 0.0354, "step": 9802, "task_loss": 0.10227955132722855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008688807487487793, "epoch": 9.31, "learning_rate": 3.7775802067260905e-05, "loss": 0.0106, "step": 9803, "task_loss": 0.02761073224246502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.15427309274673462, "epoch": 9.31, "learning_rate": 3.776664206665896e-05, "loss": 0.1546, "step": 9804, "task_loss": 0.15730072557926178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02751828357577324, "epoch": 9.31, "learning_rate": 3.7757479746935e-05, "loss": 0.0351, "step": 9805, "task_loss": 0.10342276096343994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02257220633327961, "epoch": 9.31, "learning_rate": 3.77483151097534e-05, "loss": 0.0333, "step": 9806, "task_loss": 0.1300220787525177 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16032102704048157, "epoch": 9.31, "learning_rate": 3.773914815677897e-05, "loss": 0.154, "step": 9807, "task_loss": 0.09661975502967834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039088841527700424, "epoch": 9.31, "learning_rate": 3.7729978889676915e-05, "loss": 0.0461, "step": 9808, "task_loss": 0.10870218276977539 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020086947828531265, "epoch": 9.32, "learning_rate": 3.7720807310112896e-05, "loss": 0.0218, "step": 9809, "task_loss": 0.03744789958000183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012433727271854877, "epoch": 9.32, "learning_rate": 3.7711633419752954e-05, "loss": 0.0208, "step": 9810, "task_loss": 0.09626266360282898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020316528156399727, "epoch": 9.32, "learning_rate": 3.7702457220263595e-05, "loss": 0.0277, "step": 9811, "task_loss": 0.09421400725841522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06558854132890701, "epoch": 9.32, "learning_rate": 3.76932787133117e-05, "loss": 0.0822, "step": 9812, "task_loss": 0.23205044865608215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05739504098892212, "epoch": 9.32, "learning_rate": 3.768409790056459e-05, "loss": 0.0695, "step": 9813, "task_loss": 0.1781667172908783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09154346585273743, "epoch": 9.32, "learning_rate": 3.7674914783690006e-05, "loss": 0.0877, "step": 9814, "task_loss": 0.05328264459967613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04409680515527725, "epoch": 9.32, "learning_rate": 3.7665729364356115e-05, "loss": 0.0559, "step": 9815, "task_loss": 0.16190242767333984 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027593711391091347, "epoch": 9.32, "learning_rate": 3.7656541644231494e-05, "loss": 0.0265, "step": 9816, "task_loss": 0.01616600714623928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.042008671909570694, "epoch": 9.32, "learning_rate": 3.764735162498512e-05, "loss": 0.0398, "step": 9817, "task_loss": 0.01970026269555092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08180496841669083, "epoch": 9.32, "learning_rate": 3.763815930828641e-05, "loss": 0.0787, "step": 9818, "task_loss": 0.05091682821512222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019493555650115013, "epoch": 9.32, "learning_rate": 3.76289646958052e-05, "loss": 0.0245, "step": 9819, "task_loss": 0.06980308145284653 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04548114538192749, "epoch": 9.33, "learning_rate": 3.761976778921173e-05, "loss": 0.0504, "step": 9820, "task_loss": 0.09444499015808105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0342935174703598, "epoch": 9.33, "learning_rate": 3.761056859017667e-05, "loss": 0.0368, "step": 9821, "task_loss": 0.05982755869626999 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029260210692882538, "epoch": 9.33, "learning_rate": 3.7601367100371085e-05, "loss": 0.0385, "step": 9822, "task_loss": 0.12197436392307281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02063072845339775, "epoch": 9.33, "learning_rate": 3.759216332146649e-05, "loss": 0.0332, "step": 9823, "task_loss": 0.1467389613389969 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026403427124023438, "epoch": 9.33, "learning_rate": 3.7582957255134765e-05, "loss": 0.0356, "step": 9824, "task_loss": 0.11809330433607101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009067846462130547, "epoch": 9.33, "learning_rate": 3.7573748903048266e-05, "loss": 0.0086, "step": 9825, "task_loss": 0.004426542669534683 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0586862787604332, "epoch": 9.33, "learning_rate": 3.756453826687972e-05, "loss": 0.0689, "step": 9826, "task_loss": 0.1604919135570526 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1495116949081421, "epoch": 9.33, "learning_rate": 3.755532534830229e-05, "loss": 0.1435, "step": 9827, "task_loss": 0.08945684134960175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.056340716779232025, "epoch": 9.33, "learning_rate": 3.7546110148989535e-05, "loss": 0.0687, "step": 9828, "task_loss": 0.1797863095998764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03908500820398331, "epoch": 9.33, "learning_rate": 3.7536892670615454e-05, "loss": 0.044, "step": 9829, "task_loss": 0.0877419114112854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012538935989141464, "epoch": 9.34, "learning_rate": 3.752767291485444e-05, "loss": 0.0195, "step": 9830, "task_loss": 0.08169474452733994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0177429411560297, "epoch": 9.34, "learning_rate": 3.7518450883381306e-05, "loss": 0.017, "step": 9831, "task_loss": 0.010471796616911888 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012262466363608837, "epoch": 9.34, "learning_rate": 3.750922657787128e-05, "loss": 0.0298, "step": 9832, "task_loss": 0.18793505430221558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12192486971616745, "epoch": 9.34, "learning_rate": 3.7500000000000003e-05, "loss": 0.1285, "step": 9833, "task_loss": 0.18805371224880219 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01544223167002201, "epoch": 9.34, "learning_rate": 3.7490771151443525e-05, "loss": 0.0146, "step": 9834, "task_loss": 0.006540972739458084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028887242078781128, "epoch": 9.34, "learning_rate": 3.748154003387831e-05, "loss": 0.0273, "step": 9835, "task_loss": 0.01312171295285225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014016509056091309, "epoch": 9.34, "learning_rate": 3.7472306648981235e-05, "loss": 0.0132, "step": 9836, "task_loss": 0.0060823168605566025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059632837772369385, "epoch": 9.34, "learning_rate": 3.746307099842959e-05, "loss": 0.055, "step": 9837, "task_loss": 0.01375570334494114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008659922517836094, "epoch": 9.34, "learning_rate": 3.745383308390108e-05, "loss": 0.0153, "step": 9838, "task_loss": 0.07544966042041779 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038622692227363586, "epoch": 9.34, "learning_rate": 3.74445929070738e-05, "loss": 0.0406, "step": 9839, "task_loss": 0.05818815529346466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05248776823282242, "epoch": 9.34, "learning_rate": 3.74353504696263e-05, "loss": 0.054, "step": 9840, "task_loss": 0.06735575944185257 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0463816337287426, "epoch": 9.35, "learning_rate": 3.742610577323749e-05, "loss": 0.0579, "step": 9841, "task_loss": 0.16190406680107117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04839491844177246, "epoch": 9.35, "learning_rate": 3.7416858819586724e-05, "loss": 0.0574, "step": 9842, "task_loss": 0.1388835310935974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0757160484790802, "epoch": 9.35, "learning_rate": 3.740760961035375e-05, "loss": 0.0747, "step": 9843, "task_loss": 0.06535409390926361 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02673790417611599, "epoch": 9.35, "learning_rate": 3.739835814721874e-05, "loss": 0.0248, "step": 9844, "task_loss": 0.007372252643108368 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018942799419164658, "epoch": 9.35, "learning_rate": 3.738910443186226e-05, "loss": 0.0175, "step": 9845, "task_loss": 0.004348035901784897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020467443391680717, "epoch": 9.35, "learning_rate": 3.737984846596528e-05, "loss": 0.0203, "step": 9846, "task_loss": 0.018603015691041946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020978864282369614, "epoch": 9.35, "learning_rate": 3.737059025120922e-05, "loss": 0.02, "step": 9847, "task_loss": 0.01105569303035736 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06590712815523148, "epoch": 9.35, "learning_rate": 3.7361329789275855e-05, "loss": 0.0767, "step": 9848, "task_loss": 0.1739407181739807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0286872461438179, "epoch": 9.35, "learning_rate": 3.7352067081847405e-05, "loss": 0.0266, "step": 9849, "task_loss": 0.007965076714754105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08515746146440506, "epoch": 9.35, "learning_rate": 3.734280213060649e-05, "loss": 0.0818, "step": 9850, "task_loss": 0.05148651823401451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02829767018556595, "epoch": 9.36, "learning_rate": 3.7333534937236105e-05, "loss": 0.0399, "step": 9851, "task_loss": 0.14469148218631744 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0619497075676918, "epoch": 9.36, "learning_rate": 3.7324265503419716e-05, "loss": 0.0694, "step": 9852, "task_loss": 0.13681992888450623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02087024413049221, "epoch": 9.36, "learning_rate": 3.731499383084114e-05, "loss": 0.0201, "step": 9853, "task_loss": 0.012817522510886192 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05007238686084747, "epoch": 9.36, "learning_rate": 3.730571992118462e-05, "loss": 0.0491, "step": 9854, "task_loss": 0.040530577301979065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01564808376133442, "epoch": 9.36, "learning_rate": 3.7296443776134814e-05, "loss": 0.0155, "step": 9855, "task_loss": 0.013979200273752213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01755111664533615, "epoch": 9.36, "learning_rate": 3.7287165397376775e-05, "loss": 0.0163, "step": 9856, "task_loss": 0.004699693992733955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09714722633361816, "epoch": 9.36, "learning_rate": 3.727788478659597e-05, "loss": 0.106, "step": 9857, "task_loss": 0.18595141172409058 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0596044659614563, "epoch": 9.36, "learning_rate": 3.726860194547826e-05, "loss": 0.0652, "step": 9858, "task_loss": 0.11533902585506439 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009165867231786251, "epoch": 9.36, "learning_rate": 3.725931687570992e-05, "loss": 0.0224, "step": 9859, "task_loss": 0.1417846381664276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008022119291126728, "epoch": 9.36, "learning_rate": 3.7250029578977625e-05, "loss": 0.0076, "step": 9860, "task_loss": 0.004271337762475014 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1454535573720932, "epoch": 9.36, "learning_rate": 3.724074005696847e-05, "loss": 0.1585, "step": 9861, "task_loss": 0.2754858136177063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0446578785777092, "epoch": 9.37, "learning_rate": 3.723144831136992e-05, "loss": 0.0508, "step": 9862, "task_loss": 0.10627266019582748 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009835630655288696, "epoch": 9.37, "learning_rate": 3.722215434386988e-05, "loss": 0.0124, "step": 9863, "task_loss": 0.03548569977283478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07582224905490875, "epoch": 9.37, "learning_rate": 3.721285815615665e-05, "loss": 0.088, "step": 9864, "task_loss": 0.19718605279922485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02364090085029602, "epoch": 9.37, "learning_rate": 3.7203559749918904e-05, "loss": 0.0319, "step": 9865, "task_loss": 0.10592564195394516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06448841094970703, "epoch": 9.37, "learning_rate": 3.7194259126845764e-05, "loss": 0.0599, "step": 9866, "task_loss": 0.018569234758615494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018728742375969887, "epoch": 9.37, "learning_rate": 3.7184956288626724e-05, "loss": 0.0172, "step": 9867, "task_loss": 0.00367145799100399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0083607267588377, "epoch": 9.37, "learning_rate": 3.71756512369517e-05, "loss": 0.008, "step": 9868, "task_loss": 0.004354575648903847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02431337907910347, "epoch": 9.37, "learning_rate": 3.716634397351097e-05, "loss": 0.0261, "step": 9869, "task_loss": 0.04231826961040497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028572488576173782, "epoch": 9.37, "learning_rate": 3.715703449999528e-05, "loss": 0.0353, "step": 9870, "task_loss": 0.09551827609539032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015134245157241821, "epoch": 9.37, "learning_rate": 3.7147722818095724e-05, "loss": 0.0236, "step": 9871, "task_loss": 0.09958845376968384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01670273393392563, "epoch": 9.38, "learning_rate": 3.713840892950381e-05, "loss": 0.0327, "step": 9872, "task_loss": 0.17647552490234375 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07219713926315308, "epoch": 9.38, "learning_rate": 3.712909283591145e-05, "loss": 0.0823, "step": 9873, "task_loss": 0.17350198328495026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0580780953168869, "epoch": 9.38, "learning_rate": 3.7119774539010967e-05, "loss": 0.0533, "step": 9874, "task_loss": 0.010585742071270943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03432053327560425, "epoch": 9.38, "learning_rate": 3.711045404049507e-05, "loss": 0.0325, "step": 9875, "task_loss": 0.016367098316550255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014983313158154488, "epoch": 9.38, "learning_rate": 3.710113134205687e-05, "loss": 0.0138, "step": 9876, "task_loss": 0.002722267061471939 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04396282881498337, "epoch": 9.38, "learning_rate": 3.709180644538988e-05, "loss": 0.048, "step": 9877, "task_loss": 0.08390352874994278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014825180172920227, "epoch": 9.38, "learning_rate": 3.708247935218802e-05, "loss": 0.0202, "step": 9878, "task_loss": 0.06829116493463516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1081385537981987, "epoch": 9.38, "learning_rate": 3.707315006414559e-05, "loss": 0.1184, "step": 9879, "task_loss": 0.21026736497879028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06750968843698502, "epoch": 9.38, "learning_rate": 3.706381858295731e-05, "loss": 0.0675, "step": 9880, "task_loss": 0.06727669388055801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02159801311790943, "epoch": 9.38, "learning_rate": 3.705448491031829e-05, "loss": 0.0277, "step": 9881, "task_loss": 0.08293137699365616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07070346176624298, "epoch": 9.38, "learning_rate": 3.7045149047924016e-05, "loss": 0.071, "step": 9882, "task_loss": 0.07330311834812164 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014572136104106903, "epoch": 9.39, "learning_rate": 3.703581099747041e-05, "loss": 0.0231, "step": 9883, "task_loss": 0.09984834492206573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.055414482951164246, "epoch": 9.39, "learning_rate": 3.702647076065378e-05, "loss": 0.0554, "step": 9884, "task_loss": 0.055326469242572784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025832070037722588, "epoch": 9.39, "learning_rate": 3.701712833917082e-05, "loss": 0.035, "step": 9885, "task_loss": 0.1178750991821289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013981124386191368, "epoch": 9.39, "learning_rate": 3.700778373471861e-05, "loss": 0.0131, "step": 9886, "task_loss": 0.005626115947961807 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024125682190060616, "epoch": 9.39, "learning_rate": 3.699843694899467e-05, "loss": 0.0283, "step": 9887, "task_loss": 0.06636115163564682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022186098620295525, "epoch": 9.39, "learning_rate": 3.698908798369686e-05, "loss": 0.0206, "step": 9888, "task_loss": 0.006147833541035652 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09722729027271271, "epoch": 9.39, "learning_rate": 3.697973684052347e-05, "loss": 0.1004, "step": 9889, "task_loss": 0.12891815602779388 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.006281149573624134, "epoch": 9.39, "learning_rate": 3.697038352117321e-05, "loss": 0.0182, "step": 9890, "task_loss": 0.12592127919197083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014334081672132015, "epoch": 9.39, "learning_rate": 3.6961028027345114e-05, "loss": 0.024, "step": 9891, "task_loss": 0.11108017712831497 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07208122313022614, "epoch": 9.39, "learning_rate": 3.695167036073868e-05, "loss": 0.0769, "step": 9892, "task_loss": 0.12063595652580261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04806242883205414, "epoch": 9.4, "learning_rate": 3.694231052305376e-05, "loss": 0.0582, "step": 9893, "task_loss": 0.14957112073898315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022140424698591232, "epoch": 9.4, "learning_rate": 3.693294851599063e-05, "loss": 0.0354, "step": 9894, "task_loss": 0.1551881581544876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017354466021060944, "epoch": 9.4, "learning_rate": 3.692358434124992e-05, "loss": 0.021, "step": 9895, "task_loss": 0.05346723645925522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02076183259487152, "epoch": 9.4, "learning_rate": 3.69142180005327e-05, "loss": 0.0196, "step": 9896, "task_loss": 0.009420402348041534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0411936454474926, "epoch": 9.4, "learning_rate": 3.69048494955404e-05, "loss": 0.0466, "step": 9897, "task_loss": 0.09573011100292206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03919178247451782, "epoch": 9.4, "learning_rate": 3.689547882797485e-05, "loss": 0.0358, "step": 9898, "task_loss": 0.005710486322641373 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0696905106306076, "epoch": 9.4, "learning_rate": 3.688610599953828e-05, "loss": 0.0671, "step": 9899, "task_loss": 0.0438590869307518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07281279563903809, "epoch": 9.4, "learning_rate": 3.6876731011933316e-05, "loss": 0.0817, "step": 9900, "task_loss": 0.1620171219110489 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016692060977220535, "epoch": 9.4, "learning_rate": 3.686735386686296e-05, "loss": 0.0153, "step": 9901, "task_loss": 0.002423325553536415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03054118901491165, "epoch": 9.4, "learning_rate": 3.685797456603062e-05, "loss": 0.0343, "step": 9902, "task_loss": 0.06791390478610992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013188479468226433, "epoch": 9.4, "learning_rate": 3.684859311114009e-05, "loss": 0.0183, "step": 9903, "task_loss": 0.06404251605272293 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030764251947402954, "epoch": 9.41, "learning_rate": 3.6839209503895566e-05, "loss": 0.0468, "step": 9904, "task_loss": 0.19135428965091705 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024435993283987045, "epoch": 9.41, "learning_rate": 3.6829823746001616e-05, "loss": 0.03, "step": 9905, "task_loss": 0.08053772151470184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03574752062559128, "epoch": 9.41, "learning_rate": 3.6820435839163205e-05, "loss": 0.04, "step": 9906, "task_loss": 0.07870674878358841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13980330526828766, "epoch": 9.41, "learning_rate": 3.68110457850857e-05, "loss": 0.1499, "step": 9907, "task_loss": 0.24049611389636993 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0633138120174408, "epoch": 9.41, "learning_rate": 3.680165358547484e-05, "loss": 0.0682, "step": 9908, "task_loss": 0.1119156926870346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03244736045598984, "epoch": 9.41, "learning_rate": 3.6792259242036776e-05, "loss": 0.0359, "step": 9909, "task_loss": 0.06734733283519745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020193520933389664, "epoch": 9.41, "learning_rate": 3.678286275647802e-05, "loss": 0.0188, "step": 9910, "task_loss": 0.006022298708558083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01735522225499153, "epoch": 9.41, "learning_rate": 3.677346413050551e-05, "loss": 0.0171, "step": 9911, "task_loss": 0.014364780858159065 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03136299550533295, "epoch": 9.41, "learning_rate": 3.6764063365826525e-05, "loss": 0.0348, "step": 9912, "task_loss": 0.06544046103954315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03162465989589691, "epoch": 9.41, "learning_rate": 3.675466046414878e-05, "loss": 0.0339, "step": 9913, "task_loss": 0.05433046445250511 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028096985071897507, "epoch": 9.42, "learning_rate": 3.674525542718035e-05, "loss": 0.0265, "step": 9914, "task_loss": 0.012334998697042465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10660208761692047, "epoch": 9.42, "learning_rate": 3.6735848256629705e-05, "loss": 0.1089, "step": 9915, "task_loss": 0.12940451502799988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05087193101644516, "epoch": 9.42, "learning_rate": 3.6726438954205714e-05, "loss": 0.06, "step": 9916, "task_loss": 0.14205744862556458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.035260528326034546, "epoch": 9.42, "learning_rate": 3.6717027521617595e-05, "loss": 0.0326, "step": 9917, "task_loss": 0.009002592414617538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036381468176841736, "epoch": 9.42, "learning_rate": 3.6707613960575006e-05, "loss": 0.0375, "step": 9918, "task_loss": 0.048002809286117554 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01758906990289688, "epoch": 9.42, "learning_rate": 3.669819827278795e-05, "loss": 0.0164, "step": 9919, "task_loss": 0.005666827782988548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05025162547826767, "epoch": 9.42, "learning_rate": 3.668878045996685e-05, "loss": 0.0592, "step": 9920, "task_loss": 0.13993430137634277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014506649225950241, "epoch": 9.42, "learning_rate": 3.667936052382248e-05, "loss": 0.0134, "step": 9921, "task_loss": 0.0035849660634994507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0744752436876297, "epoch": 9.42, "learning_rate": 3.666993846606602e-05, "loss": 0.0755, "step": 9922, "task_loss": 0.0845727026462555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037520673125982285, "epoch": 9.42, "learning_rate": 3.666051428840904e-05, "loss": 0.0409, "step": 9923, "task_loss": 0.07118186354637146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007355409674346447, "epoch": 9.42, "learning_rate": 3.665108799256348e-05, "loss": 0.007, "step": 9924, "task_loss": 0.003476981073617935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012398531660437584, "epoch": 9.43, "learning_rate": 3.6641659580241665e-05, "loss": 0.0276, "step": 9925, "task_loss": 0.16440680623054504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10659578442573547, "epoch": 9.43, "learning_rate": 3.663222905315633e-05, "loss": 0.1168, "step": 9926, "task_loss": 0.20838633179664612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06712566316127777, "epoch": 9.43, "learning_rate": 3.662279641302056e-05, "loss": 0.076, "step": 9927, "task_loss": 0.15624761581420898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05237956717610359, "epoch": 9.43, "learning_rate": 3.6613361661547854e-05, "loss": 0.052, "step": 9928, "task_loss": 0.04885098338127136 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05890313908457756, "epoch": 9.43, "learning_rate": 3.660392480045206e-05, "loss": 0.0631, "step": 9929, "task_loss": 0.10126684606075287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020466038957238197, "epoch": 9.43, "learning_rate": 3.659448583144745e-05, "loss": 0.0355, "step": 9930, "task_loss": 0.1712527871131897 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01883137971162796, "epoch": 9.43, "learning_rate": 3.658504475624865e-05, "loss": 0.0175, "step": 9931, "task_loss": 0.005616925656795502 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019618559628725052, "epoch": 9.43, "learning_rate": 3.657560157657067e-05, "loss": 0.0181, "step": 9932, "task_loss": 0.003942342475056648 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022781796753406525, "epoch": 9.43, "learning_rate": 3.656615629412892e-05, "loss": 0.0382, "step": 9933, "task_loss": 0.17709828913211823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021544065326452255, "epoch": 9.43, "learning_rate": 3.655670891063917e-05, "loss": 0.0356, "step": 9934, "task_loss": 0.1624692678451538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037996262311935425, "epoch": 9.43, "learning_rate": 3.6547259427817595e-05, "loss": 0.0421, "step": 9935, "task_loss": 0.07946674525737762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06063329428434372, "epoch": 9.44, "learning_rate": 3.6537807847380726e-05, "loss": 0.0579, "step": 9936, "task_loss": 0.03335261717438698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03863832727074623, "epoch": 9.44, "learning_rate": 3.65283541710455e-05, "loss": 0.046, "step": 9937, "task_loss": 0.11236982047557831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01804421842098236, "epoch": 9.44, "learning_rate": 3.6518898400529214e-05, "loss": 0.0172, "step": 9938, "task_loss": 0.009426100179553032 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03564087301492691, "epoch": 9.44, "learning_rate": 3.650944053754956e-05, "loss": 0.0408, "step": 9939, "task_loss": 0.08679479360580444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037948817014694214, "epoch": 9.44, "learning_rate": 3.6499980583824606e-05, "loss": 0.0351, "step": 9940, "task_loss": 0.009799247607588768 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020474538207054138, "epoch": 9.44, "learning_rate": 3.64905185410728e-05, "loss": 0.0258, "step": 9941, "task_loss": 0.07416295260190964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07244652509689331, "epoch": 9.44, "learning_rate": 3.6481054411012946e-05, "loss": 0.0706, "step": 9942, "task_loss": 0.05382921174168587 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03393717110157013, "epoch": 9.44, "learning_rate": 3.647158819536427e-05, "loss": 0.0355, "step": 9943, "task_loss": 0.04978133365511894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02722102776169777, "epoch": 9.44, "learning_rate": 3.646211989584635e-05, "loss": 0.0334, "step": 9944, "task_loss": 0.08923061192035675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0162800382822752, "epoch": 9.44, "learning_rate": 3.645264951417915e-05, "loss": 0.0222, "step": 9945, "task_loss": 0.0759543627500534 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012943320907652378, "epoch": 9.45, "learning_rate": 3.644317705208301e-05, "loss": 0.0122, "step": 9946, "task_loss": 0.005832251161336899 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06934761255979538, "epoch": 9.45, "learning_rate": 3.643370251127865e-05, "loss": 0.0795, "step": 9947, "task_loss": 0.17082899808883667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03814961761236191, "epoch": 9.45, "learning_rate": 3.6424225893487166e-05, "loss": 0.0484, "step": 9948, "task_loss": 0.140329971909523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03574157506227493, "epoch": 9.45, "learning_rate": 3.641474720043002e-05, "loss": 0.0353, "step": 9949, "task_loss": 0.03175017610192299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06168286129832268, "epoch": 9.45, "learning_rate": 3.6405266433829075e-05, "loss": 0.0607, "step": 9950, "task_loss": 0.05187152698636055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.033880721777677536, "epoch": 9.45, "learning_rate": 3.639578359540655e-05, "loss": 0.0376, "step": 9951, "task_loss": 0.07117892801761627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019045129418373108, "epoch": 9.45, "learning_rate": 3.638629868688506e-05, "loss": 0.0257, "step": 9952, "task_loss": 0.08584122359752655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024328162893652916, "epoch": 9.45, "learning_rate": 3.6376811709987574e-05, "loss": 0.0236, "step": 9953, "task_loss": 0.016677698120474815 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05967606231570244, "epoch": 9.45, "learning_rate": 3.636732266643745e-05, "loss": 0.0675, "step": 9954, "task_loss": 0.13744193315505981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09701327234506607, "epoch": 9.45, "learning_rate": 3.635783155795841e-05, "loss": 0.1019, "step": 9955, "task_loss": 0.14588984847068787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022994887083768845, "epoch": 9.45, "learning_rate": 3.634833838627458e-05, "loss": 0.0332, "step": 9956, "task_loss": 0.12463461607694626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021844923496246338, "epoch": 9.46, "learning_rate": 3.6338843153110424e-05, "loss": 0.0288, "step": 9957, "task_loss": 0.09174901992082596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04523449391126633, "epoch": 9.46, "learning_rate": 3.63293458601908e-05, "loss": 0.0436, "step": 9958, "task_loss": 0.028626611456274986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015888435766100883, "epoch": 9.46, "learning_rate": 3.631984650924094e-05, "loss": 0.0149, "step": 9959, "task_loss": 0.006228139623999596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04058516398072243, "epoch": 9.46, "learning_rate": 3.631034510198643e-05, "loss": 0.0452, "step": 9960, "task_loss": 0.08676454424858093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028549985960125923, "epoch": 9.46, "learning_rate": 3.630084164015328e-05, "loss": 0.0261, "step": 9961, "task_loss": 0.004079824313521385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05375894159078598, "epoch": 9.46, "learning_rate": 3.6291336125467814e-05, "loss": 0.0524, "step": 9962, "task_loss": 0.040088847279548645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019988451153039932, "epoch": 9.46, "learning_rate": 3.628182855965676e-05, "loss": 0.025, "step": 9963, "task_loss": 0.07049809396266937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01588454097509384, "epoch": 9.46, "learning_rate": 3.627231894444721e-05, "loss": 0.0162, "step": 9964, "task_loss": 0.019161686301231384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07747796177864075, "epoch": 9.46, "learning_rate": 3.6262807281566634e-05, "loss": 0.0743, "step": 9965, "task_loss": 0.04522999748587608 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03719233348965645, "epoch": 9.46, "learning_rate": 3.6253293572742884e-05, "loss": 0.0341, "step": 9966, "task_loss": 0.006006931886076927 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022969527170062065, "epoch": 9.47, "learning_rate": 3.624377781970416e-05, "loss": 0.0218, "step": 9967, "task_loss": 0.011378584429621696 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02225591614842415, "epoch": 9.47, "learning_rate": 3.6234260024179033e-05, "loss": 0.0206, "step": 9968, "task_loss": 0.005647040903568268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015963654965162277, "epoch": 9.47, "learning_rate": 3.622474018789648e-05, "loss": 0.0153, "step": 9969, "task_loss": 0.009000055491924286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012255464680492878, "epoch": 9.47, "learning_rate": 3.62152183125858e-05, "loss": 0.0114, "step": 9970, "task_loss": 0.0038295499980449677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02565714716911316, "epoch": 9.47, "learning_rate": 3.620569439997671e-05, "loss": 0.0497, "step": 9971, "task_loss": 0.2665744721889496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.033734291791915894, "epoch": 9.47, "learning_rate": 3.6196168451799266e-05, "loss": 0.0376, "step": 9972, "task_loss": 0.07192590832710266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019625328481197357, "epoch": 9.47, "learning_rate": 3.618664046978389e-05, "loss": 0.0229, "step": 9973, "task_loss": 0.052226584404706955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07318352907896042, "epoch": 9.47, "learning_rate": 3.617711045566141e-05, "loss": 0.0663, "step": 9974, "task_loss": 0.004145057871937752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10104160010814667, "epoch": 9.47, "learning_rate": 3.616757841116298e-05, "loss": 0.0914, "step": 9975, "task_loss": 0.0048540495336055756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0499558225274086, "epoch": 9.47, "learning_rate": 3.6158044338020155e-05, "loss": 0.0594, "step": 9976, "task_loss": 0.1440429389476776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018780533224344254, "epoch": 9.47, "learning_rate": 3.614850823796483e-05, "loss": 0.0174, "step": 9977, "task_loss": 0.0051146019250154495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021981973201036453, "epoch": 9.48, "learning_rate": 3.6138970112729296e-05, "loss": 0.0208, "step": 9978, "task_loss": 0.010115953162312508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013006994500756264, "epoch": 9.48, "learning_rate": 3.612942996404619e-05, "loss": 0.0122, "step": 9979, "task_loss": 0.0053765904158353806 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04389118403196335, "epoch": 9.48, "learning_rate": 3.611988779364853e-05, "loss": 0.0429, "step": 9980, "task_loss": 0.034226249903440475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.062232695519924164, "epoch": 9.48, "learning_rate": 3.611034360326971e-05, "loss": 0.0623, "step": 9981, "task_loss": 0.062431663274765015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08030220866203308, "epoch": 9.48, "learning_rate": 3.6100797394643455e-05, "loss": 0.0799, "step": 9982, "task_loss": 0.07579746097326279 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09911078959703445, "epoch": 9.48, "learning_rate": 3.60912491695039e-05, "loss": 0.0947, "step": 9983, "task_loss": 0.054534316062927246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018304534256458282, "epoch": 9.48, "learning_rate": 3.608169892958551e-05, "loss": 0.0293, "step": 9984, "task_loss": 0.1278255134820938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0267410259693861, "epoch": 9.48, "learning_rate": 3.607214667662314e-05, "loss": 0.0337, "step": 9985, "task_loss": 0.09607288241386414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012940805405378342, "epoch": 9.48, "learning_rate": 3.606259241235201e-05, "loss": 0.0164, "step": 9986, "task_loss": 0.04748379439115524 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05529266595840454, "epoch": 9.48, "learning_rate": 3.605303613850768e-05, "loss": 0.0547, "step": 9987, "task_loss": 0.048941053450107574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0491475947201252, "epoch": 9.49, "learning_rate": 3.604347785682611e-05, "loss": 0.0475, "step": 9988, "task_loss": 0.03230959177017212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.042645324021577835, "epoch": 9.49, "learning_rate": 3.60339175690436e-05, "loss": 0.0437, "step": 9989, "task_loss": 0.0532299280166626 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06176585331559181, "epoch": 9.49, "learning_rate": 3.602435527689683e-05, "loss": 0.0598, "step": 9990, "task_loss": 0.04228825867176056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015652582049369812, "epoch": 9.49, "learning_rate": 3.6014790982122816e-05, "loss": 0.024, "step": 9991, "task_loss": 0.09889158606529236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04341176524758339, "epoch": 9.49, "learning_rate": 3.6005224686458985e-05, "loss": 0.0519, "step": 9992, "task_loss": 0.1284147948026657 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04502232372760773, "epoch": 9.49, "learning_rate": 3.599565639164308e-05, "loss": 0.0422, "step": 9993, "task_loss": 0.01630707085132599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029705507680773735, "epoch": 9.49, "learning_rate": 3.5986086099413234e-05, "loss": 0.0288, "step": 9994, "task_loss": 0.020898183807730675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03598994016647339, "epoch": 9.49, "learning_rate": 3.597651381150795e-05, "loss": 0.034, "step": 9995, "task_loss": 0.015627581626176834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03647323325276375, "epoch": 9.49, "learning_rate": 3.5966939529666056e-05, "loss": 0.0337, "step": 9996, "task_loss": 0.008715398609638214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022679002955555916, "epoch": 9.49, "learning_rate": 3.595736325562679e-05, "loss": 0.0215, "step": 9997, "task_loss": 0.011273723095655441 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04891244322061539, "epoch": 9.49, "learning_rate": 3.5947784991129716e-05, "loss": 0.0549, "step": 9998, "task_loss": 0.10830025374889374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023395076394081116, "epoch": 9.5, "learning_rate": 3.593820473791476e-05, "loss": 0.0217, "step": 9999, "task_loss": 0.006120296195149422 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012700533494353294, "epoch": 9.5, "learning_rate": 3.5928622497722245e-05, "loss": 0.0125, "step": 10000, "task_loss": 0.010491423308849335 }, { "epoch": 9.5, "eval_accuracy": 0.8944954128440367, "eval_loss": 0.4492689073085785, "eval_runtime": 18.1708, "eval_samples_per_second": 47.989, "eval_steps_per_second": 5.999, "step": 10000 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07806367427110672, "epoch": 9.5, "learning_rate": 3.591903827229282e-05, "loss": 0.0813, "step": 10001, "task_loss": 0.11088013648986816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012855404987931252, "epoch": 9.5, "learning_rate": 3.590945206336751e-05, "loss": 0.0119, "step": 10002, "task_loss": 0.002988189458847046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06485391408205032, "epoch": 9.5, "learning_rate": 3.5899863872687675e-05, "loss": 0.0683, "step": 10003, "task_loss": 0.09926420450210571 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02057585120201111, "epoch": 9.5, "learning_rate": 3.58902737019951e-05, "loss": 0.0203, "step": 10004, "task_loss": 0.018218940123915672 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03241540864109993, "epoch": 9.5, "learning_rate": 3.5880681553031835e-05, "loss": 0.0318, "step": 10005, "task_loss": 0.026616670191287994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01615779660642147, "epoch": 9.5, "learning_rate": 3.5871087427540375e-05, "loss": 0.0153, "step": 10006, "task_loss": 0.007706159725785255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03969818726181984, "epoch": 9.5, "learning_rate": 3.586149132726353e-05, "loss": 0.0375, "step": 10007, "task_loss": 0.01816389709711075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017227375879883766, "epoch": 9.5, "learning_rate": 3.585189325394447e-05, "loss": 0.0161, "step": 10008, "task_loss": 0.006397966295480728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01556732039898634, "epoch": 9.51, "learning_rate": 3.5842293209326746e-05, "loss": 0.0145, "step": 10009, "task_loss": 0.00453593023121357 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012093789875507355, "epoch": 9.51, "learning_rate": 3.583269119515423e-05, "loss": 0.0182, "step": 10010, "task_loss": 0.0733606368303299 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029773391783237457, "epoch": 9.51, "learning_rate": 3.58230872131712e-05, "loss": 0.0276, "step": 10011, "task_loss": 0.00802118144929409 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015580402687191963, "epoch": 9.51, "learning_rate": 3.581348126512225e-05, "loss": 0.0147, "step": 10012, "task_loss": 0.007100388407707214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01875162497162819, "epoch": 9.51, "learning_rate": 3.5803873352752343e-05, "loss": 0.0177, "step": 10013, "task_loss": 0.00872032716870308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07013194262981415, "epoch": 9.51, "learning_rate": 3.5794263477806816e-05, "loss": 0.074, "step": 10014, "task_loss": 0.10908061265945435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023847414180636406, "epoch": 9.51, "learning_rate": 3.578465164203134e-05, "loss": 0.0376, "step": 10015, "task_loss": 0.16116636991500854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010446256026625633, "epoch": 9.51, "learning_rate": 3.577503784717195e-05, "loss": 0.0099, "step": 10016, "task_loss": 0.005132999271154404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.046971529722213745, "epoch": 9.51, "learning_rate": 3.576542209497505e-05, "loss": 0.0496, "step": 10017, "task_loss": 0.0734739899635315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020745940506458282, "epoch": 9.51, "learning_rate": 3.575580438718738e-05, "loss": 0.0244, "step": 10018, "task_loss": 0.057412195950746536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017170149832963943, "epoch": 9.51, "learning_rate": 3.574618472555604e-05, "loss": 0.0273, "step": 10019, "task_loss": 0.11874374747276306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018801424652338028, "epoch": 9.52, "learning_rate": 3.573656311182848e-05, "loss": 0.0281, "step": 10020, "task_loss": 0.11174070090055466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013433647342026234, "epoch": 9.52, "learning_rate": 3.5726939547752536e-05, "loss": 0.0126, "step": 10021, "task_loss": 0.005174616351723671 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010112293064594269, "epoch": 9.52, "learning_rate": 3.5717314035076355e-05, "loss": 0.0095, "step": 10022, "task_loss": 0.003835149109363556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041021257638931274, "epoch": 9.52, "learning_rate": 3.570768657554847e-05, "loss": 0.0591, "step": 10023, "task_loss": 0.22215372323989868 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03894636034965515, "epoch": 9.52, "learning_rate": 3.569805717091775e-05, "loss": 0.0423, "step": 10024, "task_loss": 0.07290509343147278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019539494067430496, "epoch": 9.52, "learning_rate": 3.5688425822933414e-05, "loss": 0.0342, "step": 10025, "task_loss": 0.1657709777355194 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03588699549436569, "epoch": 9.52, "learning_rate": 3.5678792533345055e-05, "loss": 0.0434, "step": 10026, "task_loss": 0.111113041639328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.058800891041755676, "epoch": 9.52, "learning_rate": 3.5669157303902604e-05, "loss": 0.0656, "step": 10027, "task_loss": 0.12722201645374298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02307158336043358, "epoch": 9.52, "learning_rate": 3.565952013635635e-05, "loss": 0.0256, "step": 10028, "task_loss": 0.04787431284785271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012087054550647736, "epoch": 9.52, "learning_rate": 3.564988103245692e-05, "loss": 0.0137, "step": 10029, "task_loss": 0.028024591505527496 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014830454252660275, "epoch": 9.53, "learning_rate": 3.564023999395531e-05, "loss": 0.0216, "step": 10030, "task_loss": 0.08278335630893707 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020527433604002, "epoch": 9.53, "learning_rate": 3.563059702260287e-05, "loss": 0.0193, "step": 10031, "task_loss": 0.00791563093662262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08408955484628677, "epoch": 9.53, "learning_rate": 3.562095212015128e-05, "loss": 0.0893, "step": 10032, "task_loss": 0.13649523258209229 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0584905743598938, "epoch": 9.53, "learning_rate": 3.5611305288352576e-05, "loss": 0.0576, "step": 10033, "task_loss": 0.04989843815565109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09185069054365158, "epoch": 9.53, "learning_rate": 3.560165652895917e-05, "loss": 0.0895, "step": 10034, "task_loss": 0.0679357573390007 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05496165156364441, "epoch": 9.53, "learning_rate": 3.5592005843723794e-05, "loss": 0.06, "step": 10035, "task_loss": 0.10506260395050049 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02257445454597473, "epoch": 9.53, "learning_rate": 3.558235323439955e-05, "loss": 0.0208, "step": 10036, "task_loss": 0.004368903115391731 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03320860490202904, "epoch": 9.53, "learning_rate": 3.557269870273987e-05, "loss": 0.0343, "step": 10037, "task_loss": 0.04424959793686867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03928327187895775, "epoch": 9.53, "learning_rate": 3.5563042250498556e-05, "loss": 0.0388, "step": 10038, "task_loss": 0.034501951187849045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06444701552391052, "epoch": 9.53, "learning_rate": 3.555338387942974e-05, "loss": 0.0616, "step": 10039, "task_loss": 0.03561048209667206 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05409969761967659, "epoch": 9.53, "learning_rate": 3.5543723591287916e-05, "loss": 0.0558, "step": 10040, "task_loss": 0.07060417532920837 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09123320877552032, "epoch": 9.54, "learning_rate": 3.5534061387827936e-05, "loss": 0.1023, "step": 10041, "task_loss": 0.202234148979187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014315794222056866, "epoch": 9.54, "learning_rate": 3.552439727080495e-05, "loss": 0.0223, "step": 10042, "task_loss": 0.09408995509147644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01862926036119461, "epoch": 9.54, "learning_rate": 3.5514731241974544e-05, "loss": 0.0183, "step": 10043, "task_loss": 0.015779605135321617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026581861078739166, "epoch": 9.54, "learning_rate": 3.5505063303092545e-05, "loss": 0.0252, "step": 10044, "task_loss": 0.012902749702334404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020298294723033905, "epoch": 9.54, "learning_rate": 3.549539345591521e-05, "loss": 0.0195, "step": 10045, "task_loss": 0.012040220201015472 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09082924574613571, "epoch": 9.54, "learning_rate": 3.5485721702199104e-05, "loss": 0.0905, "step": 10046, "task_loss": 0.0875604897737503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04488201439380646, "epoch": 9.54, "learning_rate": 3.547604804370116e-05, "loss": 0.0489, "step": 10047, "task_loss": 0.08512827754020691 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024493440985679626, "epoch": 9.54, "learning_rate": 3.5466372482178635e-05, "loss": 0.0231, "step": 10048, "task_loss": 0.010883405804634094 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03374480828642845, "epoch": 9.54, "learning_rate": 3.545669501938913e-05, "loss": 0.031, "step": 10049, "task_loss": 0.005990535020828247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022520992904901505, "epoch": 9.54, "learning_rate": 3.544701565709063e-05, "loss": 0.0242, "step": 10050, "task_loss": 0.03893338143825531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00832436978816986, "epoch": 9.55, "learning_rate": 3.54373343970414e-05, "loss": 0.0157, "step": 10051, "task_loss": 0.08178244531154633 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0154949389398098, "epoch": 9.55, "learning_rate": 3.542765124100014e-05, "loss": 0.0143, "step": 10052, "task_loss": 0.0035653971135616302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01720263808965683, "epoch": 9.55, "learning_rate": 3.541796619072579e-05, "loss": 0.0175, "step": 10053, "task_loss": 0.020343858748674393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12314651161432266, "epoch": 9.55, "learning_rate": 3.540827924797772e-05, "loss": 0.1117, "step": 10054, "task_loss": 0.00850660353899002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011486424133181572, "epoch": 9.55, "learning_rate": 3.5398590414515586e-05, "loss": 0.0106, "step": 10055, "task_loss": 0.002300182357430458 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.006632357835769653, "epoch": 9.55, "learning_rate": 3.5388899692099433e-05, "loss": 0.0235, "step": 10056, "task_loss": 0.17546282708644867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015405992977321148, "epoch": 9.55, "learning_rate": 3.537920708248961e-05, "loss": 0.0295, "step": 10057, "task_loss": 0.15601424872875214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06356394290924072, "epoch": 9.55, "learning_rate": 3.536951258744684e-05, "loss": 0.0721, "step": 10058, "task_loss": 0.1491929441690445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012521881610155106, "epoch": 9.55, "learning_rate": 3.5359816208732164e-05, "loss": 0.0118, "step": 10059, "task_loss": 0.005132080987095833 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022264419123530388, "epoch": 9.55, "learning_rate": 3.535011794810698e-05, "loss": 0.0216, "step": 10060, "task_loss": 0.015442634001374245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022136827930808067, "epoch": 9.55, "learning_rate": 3.5340417807333026e-05, "loss": 0.0227, "step": 10061, "task_loss": 0.02739689312875271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013634988106787205, "epoch": 9.56, "learning_rate": 3.533071578817239e-05, "loss": 0.0204, "step": 10062, "task_loss": 0.08093855530023575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012208834290504456, "epoch": 9.56, "learning_rate": 3.5321011892387455e-05, "loss": 0.0285, "step": 10063, "task_loss": 0.1753706932067871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07617907971143723, "epoch": 9.56, "learning_rate": 3.5311306121741015e-05, "loss": 0.0732, "step": 10064, "task_loss": 0.046028852462768555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010005553252995014, "epoch": 9.56, "learning_rate": 3.530159847799616e-05, "loss": 0.0097, "step": 10065, "task_loss": 0.0069500841200351715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0498456135392189, "epoch": 9.56, "learning_rate": 3.529188896291632e-05, "loss": 0.0624, "step": 10066, "task_loss": 0.17580851912498474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01079606544226408, "epoch": 9.56, "learning_rate": 3.5282177578265296e-05, "loss": 0.01, "step": 10067, "task_loss": 0.003003546968102455 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06311812996864319, "epoch": 9.56, "learning_rate": 3.527246432580718e-05, "loss": 0.0622, "step": 10068, "task_loss": 0.05363881587982178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020096108317375183, "epoch": 9.56, "learning_rate": 3.526274920730645e-05, "loss": 0.0185, "step": 10069, "task_loss": 0.004027027636766434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016499292105436325, "epoch": 9.56, "learning_rate": 3.525303222452791e-05, "loss": 0.0156, "step": 10070, "task_loss": 0.007455941289663315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014504550956189632, "epoch": 9.56, "learning_rate": 3.5243313379236685e-05, "loss": 0.0182, "step": 10071, "task_loss": 0.051125288009643555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028046708554029465, "epoch": 9.57, "learning_rate": 3.5233592673198245e-05, "loss": 0.037, "step": 10072, "task_loss": 0.11712638288736343 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.048199012875556946, "epoch": 9.57, "learning_rate": 3.522387010817842e-05, "loss": 0.0448, "step": 10073, "task_loss": 0.013714082539081573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.12250658124685287, "epoch": 9.57, "learning_rate": 3.521414568594335e-05, "loss": 0.1211, "step": 10074, "task_loss": 0.10803371667861938 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028487298637628555, "epoch": 9.57, "learning_rate": 3.520441940825952e-05, "loss": 0.0274, "step": 10075, "task_loss": 0.017971640452742577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01495126262307167, "epoch": 9.57, "learning_rate": 3.5194691276893755e-05, "loss": 0.0234, "step": 10076, "task_loss": 0.09949477761983871 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018718678504228592, "epoch": 9.57, "learning_rate": 3.518496129361323e-05, "loss": 0.0328, "step": 10077, "task_loss": 0.1593606173992157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0187015850096941, "epoch": 9.57, "learning_rate": 3.5175229460185425e-05, "loss": 0.0173, "step": 10078, "task_loss": 0.004929153248667717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02536281757056713, "epoch": 9.57, "learning_rate": 3.5165495778378196e-05, "loss": 0.0232, "step": 10079, "task_loss": 0.00400426983833313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0379653237760067, "epoch": 9.57, "learning_rate": 3.51557602499597e-05, "loss": 0.0348, "step": 10080, "task_loss": 0.006340883672237396 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047561101615428925, "epoch": 9.57, "learning_rate": 3.514602287669844e-05, "loss": 0.0578, "step": 10081, "task_loss": 0.15020343661308289 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017001332715153694, "epoch": 9.57, "learning_rate": 3.5136283660363255e-05, "loss": 0.0169, "step": 10082, "task_loss": 0.015564844012260437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025101391598582268, "epoch": 9.58, "learning_rate": 3.5126542602723334e-05, "loss": 0.0319, "step": 10083, "task_loss": 0.09269939363002777 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.049932364374399185, "epoch": 9.58, "learning_rate": 3.5116799705548175e-05, "loss": 0.0726, "step": 10084, "task_loss": 0.2766878008842468 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01646530255675316, "epoch": 9.58, "learning_rate": 3.510705497060762e-05, "loss": 0.0153, "step": 10085, "task_loss": 0.004672164097428322 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031244024634361267, "epoch": 9.58, "learning_rate": 3.509730839967187e-05, "loss": 0.0304, "step": 10086, "task_loss": 0.022954711690545082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0413852296769619, "epoch": 9.58, "learning_rate": 3.508755999451141e-05, "loss": 0.0503, "step": 10087, "task_loss": 0.13052834570407867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024242881685495377, "epoch": 9.58, "learning_rate": 3.50778097568971e-05, "loss": 0.0226, "step": 10088, "task_loss": 0.008248705416917801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014527438208460808, "epoch": 9.58, "learning_rate": 3.506805768860011e-05, "loss": 0.0136, "step": 10089, "task_loss": 0.005055870860815048 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043073639273643494, "epoch": 9.58, "learning_rate": 3.505830379139195e-05, "loss": 0.0481, "step": 10090, "task_loss": 0.09338083863258362 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018400005996227264, "epoch": 9.58, "learning_rate": 3.504854806704446e-05, "loss": 0.017, "step": 10091, "task_loss": 0.004409752786159515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03231433779001236, "epoch": 9.58, "learning_rate": 3.503879051732983e-05, "loss": 0.0422, "step": 10092, "task_loss": 0.1311528980731964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04105234518647194, "epoch": 9.58, "learning_rate": 3.502903114402055e-05, "loss": 0.0461, "step": 10093, "task_loss": 0.09118282049894333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04761913791298866, "epoch": 9.59, "learning_rate": 3.501926994888946e-05, "loss": 0.0462, "step": 10094, "task_loss": 0.0329703614115715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013278797268867493, "epoch": 9.59, "learning_rate": 3.500950693370974e-05, "loss": 0.0124, "step": 10095, "task_loss": 0.004137454554438591 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021566402167081833, "epoch": 9.59, "learning_rate": 3.499974210025487e-05, "loss": 0.0255, "step": 10096, "task_loss": 0.06101330369710922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03355410322546959, "epoch": 9.59, "learning_rate": 3.4989975450298694e-05, "loss": 0.0436, "step": 10097, "task_loss": 0.13387075066566467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011760689318180084, "epoch": 9.59, "learning_rate": 3.498020698561536e-05, "loss": 0.0109, "step": 10098, "task_loss": 0.0035722479224205017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040274716913700104, "epoch": 9.59, "learning_rate": 3.497043670797936e-05, "loss": 0.0377, "step": 10099, "task_loss": 0.014515921473503113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019877292215824127, "epoch": 9.59, "learning_rate": 3.496066461916552e-05, "loss": 0.0187, "step": 10100, "task_loss": 0.0076863933354616165 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05636334791779518, "epoch": 9.59, "learning_rate": 3.495089072094898e-05, "loss": 0.0547, "step": 10101, "task_loss": 0.040151506662368774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02113344892859459, "epoch": 9.59, "learning_rate": 3.494111501510522e-05, "loss": 0.0195, "step": 10102, "task_loss": 0.0052163973450660706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015629485249519348, "epoch": 9.59, "learning_rate": 3.4931337503410034e-05, "loss": 0.0238, "step": 10103, "task_loss": 0.09734653681516647 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013077488169074059, "epoch": 9.6, "learning_rate": 3.4921558187639556e-05, "loss": 0.0125, "step": 10104, "task_loss": 0.006894918158650398 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014943759888410568, "epoch": 9.6, "learning_rate": 3.491177706957026e-05, "loss": 0.0193, "step": 10105, "task_loss": 0.058562587946653366 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05631723254919052, "epoch": 9.6, "learning_rate": 3.490199415097892e-05, "loss": 0.0563, "step": 10106, "task_loss": 0.056095875799655914 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017733752727508545, "epoch": 9.6, "learning_rate": 3.489220943364266e-05, "loss": 0.0165, "step": 10107, "task_loss": 0.00526992604136467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10405229032039642, "epoch": 9.6, "learning_rate": 3.488242291933891e-05, "loss": 0.1191, "step": 10108, "task_loss": 0.254643976688385 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008884737268090248, "epoch": 9.6, "learning_rate": 3.487263460984544e-05, "loss": 0.0083, "step": 10109, "task_loss": 0.0033239684998989105 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00749258091673255, "epoch": 9.6, "learning_rate": 3.486284450694035e-05, "loss": 0.007, "step": 10110, "task_loss": 0.0027102380990982056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.046791236847639084, "epoch": 9.6, "learning_rate": 3.485305261240205e-05, "loss": 0.0484, "step": 10111, "task_loss": 0.06307273358106613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026928380131721497, "epoch": 9.6, "learning_rate": 3.4843258928009294e-05, "loss": 0.0259, "step": 10112, "task_loss": 0.01632116362452507 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030285503715276718, "epoch": 9.6, "learning_rate": 3.4833463455541146e-05, "loss": 0.0344, "step": 10113, "task_loss": 0.07147219032049179 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027739666402339935, "epoch": 9.6, "learning_rate": 3.4823666196777006e-05, "loss": 0.026, "step": 10114, "task_loss": 0.01077285036444664 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1198190227150917, "epoch": 9.61, "learning_rate": 3.481386715349659e-05, "loss": 0.119, "step": 10115, "task_loss": 0.11184495687484741 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02876965142786503, "epoch": 9.61, "learning_rate": 3.4804066327479934e-05, "loss": 0.0307, "step": 10116, "task_loss": 0.04767701029777527 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06963972747325897, "epoch": 9.61, "learning_rate": 3.4794263720507427e-05, "loss": 0.0748, "step": 10117, "task_loss": 0.12130016088485718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10158136487007141, "epoch": 9.61, "learning_rate": 3.478445933435973e-05, "loss": 0.0977, "step": 10118, "task_loss": 0.06303709745407104 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06906269490718842, "epoch": 9.61, "learning_rate": 3.4774653170817884e-05, "loss": 0.0782, "step": 10119, "task_loss": 0.1606631875038147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020416613668203354, "epoch": 9.61, "learning_rate": 3.4764845231663205e-05, "loss": 0.0258, "step": 10120, "task_loss": 0.07442939281463623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014320943504571915, "epoch": 9.61, "learning_rate": 3.475503551867737e-05, "loss": 0.0134, "step": 10121, "task_loss": 0.004721490666270256 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04403253272175789, "epoch": 9.61, "learning_rate": 3.474522403364235e-05, "loss": 0.0434, "step": 10122, "task_loss": 0.03757096827030182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009927413426339626, "epoch": 9.61, "learning_rate": 3.473541077834045e-05, "loss": 0.0096, "step": 10123, "task_loss": 0.0061761606484651566 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.046677879989147186, "epoch": 9.61, "learning_rate": 3.4725595754554295e-05, "loss": 0.0439, "step": 10124, "task_loss": 0.019169652834534645 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01028907299041748, "epoch": 9.62, "learning_rate": 3.471577896406683e-05, "loss": 0.0096, "step": 10125, "task_loss": 0.0030922014266252518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022475387901067734, "epoch": 9.62, "learning_rate": 3.470596040866133e-05, "loss": 0.0296, "step": 10126, "task_loss": 0.09410066902637482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010681994259357452, "epoch": 9.62, "learning_rate": 3.4696140090121376e-05, "loss": 0.0101, "step": 10127, "task_loss": 0.004838347434997559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011722270399332047, "epoch": 9.62, "learning_rate": 3.468631801023088e-05, "loss": 0.0287, "step": 10128, "task_loss": 0.18153433501720428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022453030571341515, "epoch": 9.62, "learning_rate": 3.467649417077406e-05, "loss": 0.0258, "step": 10129, "task_loss": 0.05557314306497574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08180783689022064, "epoch": 9.62, "learning_rate": 3.466666857353547e-05, "loss": 0.0942, "step": 10130, "task_loss": 0.20586249232292175 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017414044588804245, "epoch": 9.62, "learning_rate": 3.465684122029999e-05, "loss": 0.0161, "step": 10131, "task_loss": 0.004198454320430756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.23380213975906372, "epoch": 9.62, "learning_rate": 3.464701211285279e-05, "loss": 0.2391, "step": 10132, "task_loss": 0.2872486114501953 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013667404651641846, "epoch": 9.62, "learning_rate": 3.463718125297937e-05, "loss": 0.0128, "step": 10133, "task_loss": 0.004925645887851715 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05431966111063957, "epoch": 9.62, "learning_rate": 3.462734864246557e-05, "loss": 0.0529, "step": 10134, "task_loss": 0.04007718339562416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030022673308849335, "epoch": 9.62, "learning_rate": 3.4617514283097524e-05, "loss": 0.0402, "step": 10135, "task_loss": 0.13180501759052277 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019991595298051834, "epoch": 9.63, "learning_rate": 3.4607678176661695e-05, "loss": 0.0377, "step": 10136, "task_loss": 0.19743065536022186 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04344207048416138, "epoch": 9.63, "learning_rate": 3.459784032494484e-05, "loss": 0.0485, "step": 10137, "task_loss": 0.09402826428413391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007493377663195133, "epoch": 9.63, "learning_rate": 3.458800072973408e-05, "loss": 0.0071, "step": 10138, "task_loss": 0.0033276528120040894 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08084356784820557, "epoch": 9.63, "learning_rate": 3.45781593928168e-05, "loss": 0.0823, "step": 10139, "task_loss": 0.09574754536151886 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01496205572038889, "epoch": 9.63, "learning_rate": 3.4568316315980745e-05, "loss": 0.0204, "step": 10140, "task_loss": 0.06947852671146393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024571670219302177, "epoch": 9.63, "learning_rate": 3.455847150101395e-05, "loss": 0.0364, "step": 10141, "task_loss": 0.14293412864208221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03470940142869949, "epoch": 9.63, "learning_rate": 3.454862494970477e-05, "loss": 0.0461, "step": 10142, "task_loss": 0.1487175077199936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014040854759514332, "epoch": 9.63, "learning_rate": 3.4538776663841875e-05, "loss": 0.0133, "step": 10143, "task_loss": 0.006179919466376305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02388843148946762, "epoch": 9.63, "learning_rate": 3.452892664521427e-05, "loss": 0.0248, "step": 10144, "task_loss": 0.032703425735235214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007051503751426935, "epoch": 9.63, "learning_rate": 3.4519074895611244e-05, "loss": 0.0067, "step": 10145, "task_loss": 0.004012970253825188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013478701002895832, "epoch": 9.64, "learning_rate": 3.4509221416822415e-05, "loss": 0.0126, "step": 10146, "task_loss": 0.004567300900816917 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044648945331573486, "epoch": 9.64, "learning_rate": 3.4499366210637725e-05, "loss": 0.0537, "step": 10147, "task_loss": 0.13514164090156555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017728157341480255, "epoch": 9.64, "learning_rate": 3.4489509278847414e-05, "loss": 0.024, "step": 10148, "task_loss": 0.08038611710071564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0526009202003479, "epoch": 9.64, "learning_rate": 3.4479650623242036e-05, "loss": 0.0529, "step": 10149, "task_loss": 0.05552627891302109 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031424228101968765, "epoch": 9.64, "learning_rate": 3.446979024561246e-05, "loss": 0.0368, "step": 10150, "task_loss": 0.0853959321975708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08394592255353928, "epoch": 9.64, "learning_rate": 3.44599281477499e-05, "loss": 0.08, "step": 10151, "task_loss": 0.044918350875377655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06996019929647446, "epoch": 9.64, "learning_rate": 3.4450064331445814e-05, "loss": 0.0865, "step": 10152, "task_loss": 0.23547132313251495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.104726642370224, "epoch": 9.64, "learning_rate": 3.444019879849204e-05, "loss": 0.1095, "step": 10153, "task_loss": 0.15200763940811157 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06347490102052689, "epoch": 9.64, "learning_rate": 3.443033155068069e-05, "loss": 0.0633, "step": 10154, "task_loss": 0.06222536787390709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10413339734077454, "epoch": 9.64, "learning_rate": 3.442046258980419e-05, "loss": 0.1025, "step": 10155, "task_loss": 0.08788105845451355 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04322014003992081, "epoch": 9.64, "learning_rate": 3.4410591917655296e-05, "loss": 0.0468, "step": 10156, "task_loss": 0.07900369167327881 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051760174334049225, "epoch": 9.65, "learning_rate": 3.4400719536027056e-05, "loss": 0.0559, "step": 10157, "task_loss": 0.09334629774093628 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020617887377738953, "epoch": 9.65, "learning_rate": 3.4390845446712836e-05, "loss": 0.0218, "step": 10158, "task_loss": 0.032847531139850616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040341269224882126, "epoch": 9.65, "learning_rate": 3.438096965150632e-05, "loss": 0.0382, "step": 10159, "task_loss": 0.018778573721647263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01803889125585556, "epoch": 9.65, "learning_rate": 3.4371092152201485e-05, "loss": 0.0195, "step": 10160, "task_loss": 0.03258706256747246 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05206110328435898, "epoch": 9.65, "learning_rate": 3.4361212950592624e-05, "loss": 0.0492, "step": 10161, "task_loss": 0.023629793897271156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011896253563463688, "epoch": 9.65, "learning_rate": 3.435133204847435e-05, "loss": 0.0111, "step": 10162, "task_loss": 0.00429350882768631 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019481834024190903, "epoch": 9.65, "learning_rate": 3.4341449447641575e-05, "loss": 0.0182, "step": 10163, "task_loss": 0.006555115804076195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044923312962055206, "epoch": 9.65, "learning_rate": 3.433156514988951e-05, "loss": 0.0486, "step": 10164, "task_loss": 0.0811932161450386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026373110711574554, "epoch": 9.65, "learning_rate": 3.432167915701371e-05, "loss": 0.0293, "step": 10165, "task_loss": 0.05567855015397072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015387404710054398, "epoch": 9.65, "learning_rate": 3.431179147080999e-05, "loss": 0.0244, "step": 10166, "task_loss": 0.10537419468164444 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05314071476459503, "epoch": 9.66, "learning_rate": 3.4301902093074504e-05, "loss": 0.0508, "step": 10167, "task_loss": 0.030011305585503578 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020540203899145126, "epoch": 9.66, "learning_rate": 3.42920110256037e-05, "loss": 0.0278, "step": 10168, "task_loss": 0.09346860647201538 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05021016672253609, "epoch": 9.66, "learning_rate": 3.428211827019434e-05, "loss": 0.0555, "step": 10169, "task_loss": 0.10355982184410095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00913775060325861, "epoch": 9.66, "learning_rate": 3.42722238286435e-05, "loss": 0.0171, "step": 10170, "task_loss": 0.08899959921836853 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03822232410311699, "epoch": 9.66, "learning_rate": 3.426232770274855e-05, "loss": 0.04, "step": 10171, "task_loss": 0.05588465929031372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010778695344924927, "epoch": 9.66, "learning_rate": 3.4252429894307154e-05, "loss": 0.0185, "step": 10172, "task_loss": 0.08806253969669342 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025916989892721176, "epoch": 9.66, "learning_rate": 3.424253040511731e-05, "loss": 0.0386, "step": 10173, "task_loss": 0.1526973396539688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015541073866188526, "epoch": 9.66, "learning_rate": 3.4232629236977316e-05, "loss": 0.0146, "step": 10174, "task_loss": 0.006028560921549797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0131698502227664, "epoch": 9.66, "learning_rate": 3.4222726391685746e-05, "loss": 0.0269, "step": 10175, "task_loss": 0.15060535073280334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01671590656042099, "epoch": 9.66, "learning_rate": 3.42128218710415e-05, "loss": 0.019, "step": 10176, "task_loss": 0.039641670882701874 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009115570224821568, "epoch": 9.66, "learning_rate": 3.420291567684381e-05, "loss": 0.0088, "step": 10177, "task_loss": 0.00632680207490921 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08051498234272003, "epoch": 9.67, "learning_rate": 3.419300781089216e-05, "loss": 0.0786, "step": 10178, "task_loss": 0.061485860496759415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03897204250097275, "epoch": 9.67, "learning_rate": 3.418309827498637e-05, "loss": 0.0359, "step": 10179, "task_loss": 0.008243357762694359 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011700714007019997, "epoch": 9.67, "learning_rate": 3.4173187070926546e-05, "loss": 0.0111, "step": 10180, "task_loss": 0.005291668698191643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012526036240160465, "epoch": 9.67, "learning_rate": 3.4163274200513116e-05, "loss": 0.0186, "step": 10181, "task_loss": 0.07343530654907227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017924156039953232, "epoch": 9.67, "learning_rate": 3.415335966554679e-05, "loss": 0.0179, "step": 10182, "task_loss": 0.017576100304722786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06711853295564651, "epoch": 9.67, "learning_rate": 3.414344346782861e-05, "loss": 0.0728, "step": 10183, "task_loss": 0.12400171160697937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017617417499423027, "epoch": 9.67, "learning_rate": 3.413352560915988e-05, "loss": 0.0164, "step": 10184, "task_loss": 0.005933165550231934 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04335098713636398, "epoch": 9.67, "learning_rate": 3.412360609134223e-05, "loss": 0.0492, "step": 10185, "task_loss": 0.10230584442615509 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01754794456064701, "epoch": 9.67, "learning_rate": 3.411368491617761e-05, "loss": 0.0165, "step": 10186, "task_loss": 0.006676128134131432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08341458439826965, "epoch": 9.67, "learning_rate": 3.410376208546822e-05, "loss": 0.0941, "step": 10187, "task_loss": 0.19037111103534698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012632082216441631, "epoch": 9.68, "learning_rate": 3.409383760101661e-05, "loss": 0.0321, "step": 10188, "task_loss": 0.20703136920928955 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05429381877183914, "epoch": 9.68, "learning_rate": 3.4083911464625596e-05, "loss": 0.0664, "step": 10189, "task_loss": 0.17530539631843567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01827721670269966, "epoch": 9.68, "learning_rate": 3.407398367809832e-05, "loss": 0.0346, "step": 10190, "task_loss": 0.18168266117572784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015146693214774132, "epoch": 9.68, "learning_rate": 3.406405424323821e-05, "loss": 0.0274, "step": 10191, "task_loss": 0.13719764351844788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05147261545062065, "epoch": 9.68, "learning_rate": 3.4054123161848995e-05, "loss": 0.059, "step": 10192, "task_loss": 0.12712138891220093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024947544559836388, "epoch": 9.68, "learning_rate": 3.4044190435734695e-05, "loss": 0.0233, "step": 10193, "task_loss": 0.008586343377828598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010366448201239109, "epoch": 9.68, "learning_rate": 3.403425606669965e-05, "loss": 0.0099, "step": 10194, "task_loss": 0.005786292254924774 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08639805763959885, "epoch": 9.68, "learning_rate": 3.4024320056548475e-05, "loss": 0.0934, "step": 10195, "task_loss": 0.15593896806240082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04102171212434769, "epoch": 9.68, "learning_rate": 3.401438240708611e-05, "loss": 0.0455, "step": 10196, "task_loss": 0.08535470068454742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02099025435745716, "epoch": 9.68, "learning_rate": 3.400444312011776e-05, "loss": 0.0239, "step": 10197, "task_loss": 0.049628641456365585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07851079851388931, "epoch": 9.68, "learning_rate": 3.399450219744896e-05, "loss": 0.0751, "step": 10198, "task_loss": 0.044152356684207916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039639078080654144, "epoch": 9.69, "learning_rate": 3.3984559640885505e-05, "loss": 0.0609, "step": 10199, "task_loss": 0.2518640458583832 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03628784418106079, "epoch": 9.69, "learning_rate": 3.3974615452233526e-05, "loss": 0.0336, "step": 10200, "task_loss": 0.009241720661520958 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07165795564651489, "epoch": 9.69, "learning_rate": 3.396466963329944e-05, "loss": 0.071, "step": 10201, "task_loss": 0.06518401205539703 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026513490825891495, "epoch": 9.69, "learning_rate": 3.395472218588992e-05, "loss": 0.0297, "step": 10202, "task_loss": 0.05858299508690834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023471450433135033, "epoch": 9.69, "learning_rate": 3.394477311181201e-05, "loss": 0.0217, "step": 10203, "task_loss": 0.005670515820384026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08482164889574051, "epoch": 9.69, "learning_rate": 3.393482241287297e-05, "loss": 0.0974, "step": 10204, "task_loss": 0.21033701300621033 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05313311517238617, "epoch": 9.69, "learning_rate": 3.392487009088041e-05, "loss": 0.0544, "step": 10205, "task_loss": 0.06595422327518463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03181048855185509, "epoch": 9.69, "learning_rate": 3.391491614764222e-05, "loss": 0.0369, "step": 10206, "task_loss": 0.08310259878635406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010201860219240189, "epoch": 9.69, "learning_rate": 3.390496058496657e-05, "loss": 0.018, "step": 10207, "task_loss": 0.0884314775466919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02600035071372986, "epoch": 9.69, "learning_rate": 3.3895003404661955e-05, "loss": 0.0239, "step": 10208, "task_loss": 0.005447834730148315 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04286601021885872, "epoch": 9.7, "learning_rate": 3.3885044608537125e-05, "loss": 0.0517, "step": 10209, "task_loss": 0.13154464960098267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0452398955821991, "epoch": 9.7, "learning_rate": 3.387508419840115e-05, "loss": 0.0499, "step": 10210, "task_loss": 0.0916098803281784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04269058629870415, "epoch": 9.7, "learning_rate": 3.386512217606339e-05, "loss": 0.0601, "step": 10211, "task_loss": 0.2167372852563858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06171311438083649, "epoch": 9.7, "learning_rate": 3.385515854333349e-05, "loss": 0.0662, "step": 10212, "task_loss": 0.10651151090860367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1391063630580902, "epoch": 9.7, "learning_rate": 3.38451933020214e-05, "loss": 0.135, "step": 10213, "task_loss": 0.09796366840600967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05202070623636246, "epoch": 9.7, "learning_rate": 3.383522645393734e-05, "loss": 0.0639, "step": 10214, "task_loss": 0.17111501097679138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04103817790746689, "epoch": 9.7, "learning_rate": 3.3825258000891846e-05, "loss": 0.0503, "step": 10215, "task_loss": 0.13386118412017822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026339948177337646, "epoch": 9.7, "learning_rate": 3.381528794469574e-05, "loss": 0.0335, "step": 10216, "task_loss": 0.09778992086648941 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009569020941853523, "epoch": 9.7, "learning_rate": 3.380531628716012e-05, "loss": 0.0091, "step": 10217, "task_loss": 0.004733014851808548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05844016373157501, "epoch": 9.7, "learning_rate": 3.3795343030096384e-05, "loss": 0.0596, "step": 10218, "task_loss": 0.06965689361095428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061011701822280884, "epoch": 9.7, "learning_rate": 3.3785368175316226e-05, "loss": 0.0728, "step": 10219, "task_loss": 0.1784566342830658 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01999977044761181, "epoch": 9.71, "learning_rate": 3.377539172463164e-05, "loss": 0.0248, "step": 10220, "task_loss": 0.06758248805999756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019030677154660225, "epoch": 9.71, "learning_rate": 3.376541367985488e-05, "loss": 0.0196, "step": 10221, "task_loss": 0.02473231591284275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03495609387755394, "epoch": 9.71, "learning_rate": 3.3755434042798506e-05, "loss": 0.033, "step": 10222, "task_loss": 0.01556423120200634 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1749984323978424, "epoch": 9.71, "learning_rate": 3.374545281527538e-05, "loss": 0.1663, "step": 10223, "task_loss": 0.08834327012300491 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012353798374533653, "epoch": 9.71, "learning_rate": 3.3735469999098615e-05, "loss": 0.0178, "step": 10224, "task_loss": 0.06693007051944733 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08601576089859009, "epoch": 9.71, "learning_rate": 3.372548559608166e-05, "loss": 0.0848, "step": 10225, "task_loss": 0.07415040582418442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03538770601153374, "epoch": 9.71, "learning_rate": 3.3715499608038234e-05, "loss": 0.0404, "step": 10226, "task_loss": 0.08567916601896286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025410983711481094, "epoch": 9.71, "learning_rate": 3.370551203678233e-05, "loss": 0.0321, "step": 10227, "task_loss": 0.09199456870555878 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07486201822757721, "epoch": 9.71, "learning_rate": 3.369552288412822e-05, "loss": 0.083, "step": 10228, "task_loss": 0.15588508546352386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05014657974243164, "epoch": 9.71, "learning_rate": 3.368553215189052e-05, "loss": 0.0607, "step": 10229, "task_loss": 0.155786395072937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044199906289577484, "epoch": 9.72, "learning_rate": 3.367553984188407e-05, "loss": 0.0514, "step": 10230, "task_loss": 0.11591973900794983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02194777876138687, "epoch": 9.72, "learning_rate": 3.366554595592402e-05, "loss": 0.0213, "step": 10231, "task_loss": 0.01566997356712818 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06304118037223816, "epoch": 9.72, "learning_rate": 3.365555049582582e-05, "loss": 0.0571, "step": 10232, "task_loss": 0.003476545214653015 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05096285790205002, "epoch": 9.72, "learning_rate": 3.364555346340518e-05, "loss": 0.0523, "step": 10233, "task_loss": 0.06410223990678787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061933599412441254, "epoch": 9.72, "learning_rate": 3.3635554860478126e-05, "loss": 0.0585, "step": 10234, "task_loss": 0.02772807516157627 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.16535219550132751, "epoch": 9.72, "learning_rate": 3.362555468886093e-05, "loss": 0.1643, "step": 10235, "task_loss": 0.1545773446559906 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039442308247089386, "epoch": 9.72, "learning_rate": 3.361555295037019e-05, "loss": 0.047, "step": 10236, "task_loss": 0.11461587995290756 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019081950187683105, "epoch": 9.72, "learning_rate": 3.360554964682276e-05, "loss": 0.0304, "step": 10237, "task_loss": 0.13198192417621613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02817378006875515, "epoch": 9.72, "learning_rate": 3.359554478003579e-05, "loss": 0.038, "step": 10238, "task_loss": 0.1261017769575119 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024465490132570267, "epoch": 9.72, "learning_rate": 3.358553835182673e-05, "loss": 0.0231, "step": 10239, "task_loss": 0.01032637245953083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026946038007736206, "epoch": 9.72, "learning_rate": 3.357553036401326e-05, "loss": 0.0259, "step": 10240, "task_loss": 0.016648059710860252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10884161293506622, "epoch": 9.73, "learning_rate": 3.356552081841341e-05, "loss": 0.1042, "step": 10241, "task_loss": 0.062114790081977844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044160258024930954, "epoch": 9.73, "learning_rate": 3.355550971684545e-05, "loss": 0.0446, "step": 10242, "task_loss": 0.048725277185440063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016672126948833466, "epoch": 9.73, "learning_rate": 3.3545497061127946e-05, "loss": 0.0278, "step": 10243, "task_loss": 0.12836672365665436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025100041180849075, "epoch": 9.73, "learning_rate": 3.353548285307975e-05, "loss": 0.0276, "step": 10244, "task_loss": 0.050310954451560974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07603813707828522, "epoch": 9.73, "learning_rate": 3.352546709451998e-05, "loss": 0.0717, "step": 10245, "task_loss": 0.03274556249380112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06987538933753967, "epoch": 9.73, "learning_rate": 3.351544978726805e-05, "loss": 0.0762, "step": 10246, "task_loss": 0.1328875869512558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01046474277973175, "epoch": 9.73, "learning_rate": 3.350543093314366e-05, "loss": 0.0115, "step": 10247, "task_loss": 0.020672082901000977 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018444234505295753, "epoch": 9.73, "learning_rate": 3.349541053396678e-05, "loss": 0.0198, "step": 10248, "task_loss": 0.03211362287402153 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13752481341362, "epoch": 9.73, "learning_rate": 3.348538859155766e-05, "loss": 0.1363, "step": 10249, "task_loss": 0.12482313811779022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024666164070367813, "epoch": 9.73, "learning_rate": 3.347536510773684e-05, "loss": 0.031, "step": 10250, "task_loss": 0.08787593245506287 }, { "epoch": 9.73, "eval_accuracy": 0.8910550458715596, "eval_loss": 0.4798487424850464, "eval_runtime": 18.1145, "eval_samples_per_second": 48.138, "eval_steps_per_second": 6.017, "step": 10250 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026560328900814056, "epoch": 9.74, "learning_rate": 3.346534008432513e-05, "loss": 0.0258, "step": 10251, "task_loss": 0.018506888300180435 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017583798617124557, "epoch": 9.74, "learning_rate": 3.3455313523143615e-05, "loss": 0.0163, "step": 10252, "task_loss": 0.004295343533158302 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06413343548774719, "epoch": 9.74, "learning_rate": 3.3445285426013685e-05, "loss": 0.0662, "step": 10253, "task_loss": 0.08491207659244537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07041002064943314, "epoch": 9.74, "learning_rate": 3.343525579475698e-05, "loss": 0.0762, "step": 10254, "task_loss": 0.12783107161521912 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021717432886362076, "epoch": 9.74, "learning_rate": 3.342522463119543e-05, "loss": 0.0355, "step": 10255, "task_loss": 0.15983924269676208 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016216669231653214, "epoch": 9.74, "learning_rate": 3.341519193715127e-05, "loss": 0.0152, "step": 10256, "task_loss": 0.00632026232779026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03318667411804199, "epoch": 9.74, "learning_rate": 3.340515771444695e-05, "loss": 0.0442, "step": 10257, "task_loss": 0.14321547746658325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0912448838353157, "epoch": 9.74, "learning_rate": 3.3395121964905265e-05, "loss": 0.0919, "step": 10258, "task_loss": 0.09797343611717224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09608903527259827, "epoch": 9.74, "learning_rate": 3.338508469034922e-05, "loss": 0.1046, "step": 10259, "task_loss": 0.18118667602539062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04384881630539894, "epoch": 9.74, "learning_rate": 3.3375045892602176e-05, "loss": 0.0422, "step": 10260, "task_loss": 0.02693062275648117 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023890579119324684, "epoch": 9.74, "learning_rate": 3.3365005573487706e-05, "loss": 0.0328, "step": 10261, "task_loss": 0.11346684396266937 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027102213352918625, "epoch": 9.75, "learning_rate": 3.335496373482969e-05, "loss": 0.0256, "step": 10262, "task_loss": 0.012507695704698563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07778482139110565, "epoch": 9.75, "learning_rate": 3.334492037845227e-05, "loss": 0.0737, "step": 10263, "task_loss": 0.037038303911685944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0290234312415123, "epoch": 9.75, "learning_rate": 3.333487550617987e-05, "loss": 0.0362, "step": 10264, "task_loss": 0.10083475708961487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013366115279495716, "epoch": 9.75, "learning_rate": 3.332482911983721e-05, "loss": 0.0137, "step": 10265, "task_loss": 0.01663101837038994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08686914294958115, "epoch": 9.75, "learning_rate": 3.331478122124924e-05, "loss": 0.0917, "step": 10266, "task_loss": 0.13518640398979187 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01988716423511505, "epoch": 9.75, "learning_rate": 3.330473181224121e-05, "loss": 0.0226, "step": 10267, "task_loss": 0.04693231359124184 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03620753064751625, "epoch": 9.75, "learning_rate": 3.3294680894638655e-05, "loss": 0.0332, "step": 10268, "task_loss": 0.0058042556047439575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08979254215955734, "epoch": 9.75, "learning_rate": 3.328462847026736e-05, "loss": 0.0962, "step": 10269, "task_loss": 0.15426769852638245 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030872471630573273, "epoch": 9.75, "learning_rate": 3.327457454095342e-05, "loss": 0.0325, "step": 10270, "task_loss": 0.04736156761646271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010424168780446053, "epoch": 9.75, "learning_rate": 3.3264519108523154e-05, "loss": 0.0103, "step": 10271, "task_loss": 0.009468691423535347 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03220530226826668, "epoch": 9.75, "learning_rate": 3.3254462174803186e-05, "loss": 0.0311, "step": 10272, "task_loss": 0.021126460283994675 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03830189257860184, "epoch": 9.76, "learning_rate": 3.324440374162041e-05, "loss": 0.0357, "step": 10273, "task_loss": 0.012562312185764313 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03549380600452423, "epoch": 9.76, "learning_rate": 3.323434381080199e-05, "loss": 0.0339, "step": 10274, "task_loss": 0.019155368208885193 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0406593419611454, "epoch": 9.76, "learning_rate": 3.322428238417537e-05, "loss": 0.0548, "step": 10275, "task_loss": 0.18189404904842377 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08273656666278839, "epoch": 9.76, "learning_rate": 3.321421946356823e-05, "loss": 0.0821, "step": 10276, "task_loss": 0.07649406045675278 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047196559607982635, "epoch": 9.76, "learning_rate": 3.320415505080858e-05, "loss": 0.0473, "step": 10277, "task_loss": 0.048162683844566345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0971732959151268, "epoch": 9.76, "learning_rate": 3.3194089147724644e-05, "loss": 0.1024, "step": 10278, "task_loss": 0.1492057889699936 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008488696068525314, "epoch": 9.76, "learning_rate": 3.3184021756144954e-05, "loss": 0.0085, "step": 10279, "task_loss": 0.008146345615386963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03513052687048912, "epoch": 9.76, "learning_rate": 3.317395287789829e-05, "loss": 0.0421, "step": 10280, "task_loss": 0.1049308255314827 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060586679726839066, "epoch": 9.76, "learning_rate": 3.316388251481373e-05, "loss": 0.0639, "step": 10281, "task_loss": 0.09365570545196533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02429119497537613, "epoch": 9.76, "learning_rate": 3.3153810668720594e-05, "loss": 0.0301, "step": 10282, "task_loss": 0.0827622190117836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02623087167739868, "epoch": 9.77, "learning_rate": 3.3143737341448475e-05, "loss": 0.0325, "step": 10283, "task_loss": 0.0885535329580307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09432581067085266, "epoch": 9.77, "learning_rate": 3.3133662534827255e-05, "loss": 0.0897, "step": 10284, "task_loss": 0.04778246581554413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.040426380932331085, "epoch": 9.77, "learning_rate": 3.3123586250687055e-05, "loss": 0.0396, "step": 10285, "task_loss": 0.031931400299072266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047953128814697266, "epoch": 9.77, "learning_rate": 3.311350849085829e-05, "loss": 0.0458, "step": 10286, "task_loss": 0.026047490537166595 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041989296674728394, "epoch": 9.77, "learning_rate": 3.3103429257171635e-05, "loss": 0.0387, "step": 10287, "task_loss": 0.009208640083670616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023370787501335144, "epoch": 9.77, "learning_rate": 3.309334855145803e-05, "loss": 0.031, "step": 10288, "task_loss": 0.09938006848096848 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07616433501243591, "epoch": 9.77, "learning_rate": 3.3083266375548675e-05, "loss": 0.0742, "step": 10289, "task_loss": 0.05699377879500389 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13219673931598663, "epoch": 9.77, "learning_rate": 3.3073182731275064e-05, "loss": 0.1273, "step": 10290, "task_loss": 0.08289719372987747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.043332893401384354, "epoch": 9.77, "learning_rate": 3.306309762046892e-05, "loss": 0.0515, "step": 10291, "task_loss": 0.1250426173210144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10248932242393494, "epoch": 9.77, "learning_rate": 3.305301104496227e-05, "loss": 0.0988, "step": 10292, "task_loss": 0.06527666747570038 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01327238418161869, "epoch": 9.77, "learning_rate": 3.3042923006587366e-05, "loss": 0.0264, "step": 10293, "task_loss": 0.145028218626976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015359059907495975, "epoch": 9.78, "learning_rate": 3.303283350717678e-05, "loss": 0.0216, "step": 10294, "task_loss": 0.07747124135494232 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019833415746688843, "epoch": 9.78, "learning_rate": 3.302274254856329e-05, "loss": 0.0288, "step": 10295, "task_loss": 0.10919887572526932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015060881152749062, "epoch": 9.78, "learning_rate": 3.301265013257998e-05, "loss": 0.0139, "step": 10296, "task_loss": 0.0035025514662265778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13087275624275208, "epoch": 9.78, "learning_rate": 3.300255626106019e-05, "loss": 0.1465, "step": 10297, "task_loss": 0.2874350845813751 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02529519982635975, "epoch": 9.78, "learning_rate": 3.2992460935837505e-05, "loss": 0.0235, "step": 10298, "task_loss": 0.0074701253324747086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011823216453194618, "epoch": 9.78, "learning_rate": 3.2982364158745805e-05, "loss": 0.011, "step": 10299, "task_loss": 0.003194596618413925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05281532555818558, "epoch": 9.78, "learning_rate": 3.297226593161921e-05, "loss": 0.0509, "step": 10300, "task_loss": 0.033622805029153824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013377421535551548, "epoch": 9.78, "learning_rate": 3.2962166256292113e-05, "loss": 0.0233, "step": 10301, "task_loss": 0.11291710287332535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011167014017701149, "epoch": 9.78, "learning_rate": 3.295206513459917e-05, "loss": 0.0191, "step": 10302, "task_loss": 0.09001649916172028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019459430128335953, "epoch": 9.78, "learning_rate": 3.2941962568375296e-05, "loss": 0.018, "step": 10303, "task_loss": 0.005041791126132011 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014631148427724838, "epoch": 9.79, "learning_rate": 3.2931858559455674e-05, "loss": 0.0183, "step": 10304, "task_loss": 0.05133789777755737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08178240060806274, "epoch": 9.79, "learning_rate": 3.292175310967575e-05, "loss": 0.085, "step": 10305, "task_loss": 0.11350104212760925 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029704544693231583, "epoch": 9.79, "learning_rate": 3.291164622087122e-05, "loss": 0.039, "step": 10306, "task_loss": 0.12229295074939728 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007734452374279499, "epoch": 9.79, "learning_rate": 3.290153789487804e-05, "loss": 0.0074, "step": 10307, "task_loss": 0.004266131669282913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01227061077952385, "epoch": 9.79, "learning_rate": 3.289142813353246e-05, "loss": 0.0113, "step": 10308, "task_loss": 0.0023228712379932404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016713295131921768, "epoch": 9.79, "learning_rate": 3.2881316938670945e-05, "loss": 0.0228, "step": 10309, "task_loss": 0.07756958901882172 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031723689287900925, "epoch": 9.79, "learning_rate": 3.2871204312130254e-05, "loss": 0.032, "step": 10310, "task_loss": 0.03421415388584137 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015944186598062515, "epoch": 9.79, "learning_rate": 3.28610902557474e-05, "loss": 0.0221, "step": 10311, "task_loss": 0.07747285813093185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04275937378406525, "epoch": 9.79, "learning_rate": 3.285097477135963e-05, "loss": 0.0503, "step": 10312, "task_loss": 0.11856701970100403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023751258850097656, "epoch": 9.79, "learning_rate": 3.284085786080449e-05, "loss": 0.023, "step": 10313, "task_loss": 0.01643642783164978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019033825024962425, "epoch": 9.79, "learning_rate": 3.283073952591975e-05, "loss": 0.0185, "step": 10314, "task_loss": 0.013599434867501259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.13899603486061096, "epoch": 9.8, "learning_rate": 3.2820619768543473e-05, "loss": 0.1415, "step": 10315, "task_loss": 0.1637457311153412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09794485569000244, "epoch": 9.8, "learning_rate": 3.281049859051394e-05, "loss": 0.0938, "step": 10316, "task_loss": 0.0560276135802269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018764419481158257, "epoch": 9.8, "learning_rate": 3.280037599366972e-05, "loss": 0.0268, "step": 10317, "task_loss": 0.09955213218927383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11311816424131393, "epoch": 9.8, "learning_rate": 3.2790251979849654e-05, "loss": 0.1127, "step": 10318, "task_loss": 0.10870064795017242 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10437528789043427, "epoch": 9.8, "learning_rate": 3.278012655089277e-05, "loss": 0.126, "step": 10319, "task_loss": 0.3202948570251465 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009548291563987732, "epoch": 9.8, "learning_rate": 3.276999970863845e-05, "loss": 0.0129, "step": 10320, "task_loss": 0.04352171719074249 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01687040366232395, "epoch": 9.8, "learning_rate": 3.275987145492625e-05, "loss": 0.0159, "step": 10321, "task_loss": 0.007143234834074974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047225743532180786, "epoch": 9.8, "learning_rate": 3.274974179159603e-05, "loss": 0.0458, "step": 10322, "task_loss": 0.0325019434094429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03787485137581825, "epoch": 9.8, "learning_rate": 3.27396107204879e-05, "loss": 0.0505, "step": 10323, "task_loss": 0.16443775594234467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024566274136304855, "epoch": 9.8, "learning_rate": 3.2729478243442194e-05, "loss": 0.0231, "step": 10324, "task_loss": 0.009893251582980156 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018858332186937332, "epoch": 9.81, "learning_rate": 3.271934436229955e-05, "loss": 0.0259, "step": 10325, "task_loss": 0.0887862890958786 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012717785313725471, "epoch": 9.81, "learning_rate": 3.270920907890082e-05, "loss": 0.0124, "step": 10326, "task_loss": 0.00914292223751545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008497057482600212, "epoch": 9.81, "learning_rate": 3.269907239508714e-05, "loss": 0.0103, "step": 10327, "task_loss": 0.026958035305142403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013917407020926476, "epoch": 9.81, "learning_rate": 3.268893431269987e-05, "loss": 0.0173, "step": 10328, "task_loss": 0.047798462212085724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022593341767787933, "epoch": 9.81, "learning_rate": 3.2678794833580654e-05, "loss": 0.0267, "step": 10329, "task_loss": 0.0638725757598877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020902365446090698, "epoch": 9.81, "learning_rate": 3.2668653959571384e-05, "loss": 0.0272, "step": 10330, "task_loss": 0.08406180143356323 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021388908848166466, "epoch": 9.81, "learning_rate": 3.2658511692514184e-05, "loss": 0.0294, "step": 10331, "task_loss": 0.10142026096582413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03885917365550995, "epoch": 9.81, "learning_rate": 3.2648368034251454e-05, "loss": 0.0584, "step": 10332, "task_loss": 0.23429960012435913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06736599653959274, "epoch": 9.81, "learning_rate": 3.263822298662583e-05, "loss": 0.0732, "step": 10333, "task_loss": 0.12564432621002197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01693696156144142, "epoch": 9.81, "learning_rate": 3.2628076551480216e-05, "loss": 0.0215, "step": 10334, "task_loss": 0.06282781064510345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03203189745545387, "epoch": 9.81, "learning_rate": 3.2617928730657764e-05, "loss": 0.0306, "step": 10335, "task_loss": 0.018122676759958267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01440800167620182, "epoch": 9.82, "learning_rate": 3.260777952600186e-05, "loss": 0.0135, "step": 10336, "task_loss": 0.004851058125495911 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04675164446234703, "epoch": 9.82, "learning_rate": 3.2597628939356175e-05, "loss": 0.0745, "step": 10337, "task_loss": 0.3238287568092346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012276804074645042, "epoch": 9.82, "learning_rate": 3.25874769725646e-05, "loss": 0.0225, "step": 10338, "task_loss": 0.114773690700531 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013100717216730118, "epoch": 9.82, "learning_rate": 3.257732362747129e-05, "loss": 0.0128, "step": 10339, "task_loss": 0.009668847545981407 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07656170427799225, "epoch": 9.82, "learning_rate": 3.256716890592065e-05, "loss": 0.0814, "step": 10340, "task_loss": 0.1245703175663948 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09409487247467041, "epoch": 9.82, "learning_rate": 3.255701280975733e-05, "loss": 0.0905, "step": 10341, "task_loss": 0.05802832543849945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00820472463965416, "epoch": 9.82, "learning_rate": 3.2546855340826246e-05, "loss": 0.0078, "step": 10342, "task_loss": 0.004141604527831078 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0564790740609169, "epoch": 9.82, "learning_rate": 3.253669650097254e-05, "loss": 0.0648, "step": 10343, "task_loss": 0.13937750458717346 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.033709198236465454, "epoch": 9.82, "learning_rate": 3.2526536292041625e-05, "loss": 0.0378, "step": 10344, "task_loss": 0.07450772076845169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061526041477918625, "epoch": 9.82, "learning_rate": 3.2516374715879126e-05, "loss": 0.0774, "step": 10345, "task_loss": 0.22075936198234558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029069392010569572, "epoch": 9.83, "learning_rate": 3.250621177433097e-05, "loss": 0.0391, "step": 10346, "task_loss": 0.12960076332092285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03016573004424572, "epoch": 9.83, "learning_rate": 3.249604746924331e-05, "loss": 0.0284, "step": 10347, "task_loss": 0.012552225962281227 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1340094357728958, "epoch": 9.83, "learning_rate": 3.248588180246251e-05, "loss": 0.1435, "step": 10348, "task_loss": 0.22935834527015686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.046159401535987854, "epoch": 9.83, "learning_rate": 3.247571477583523e-05, "loss": 0.0571, "step": 10349, "task_loss": 0.1553247720003128 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01781924068927765, "epoch": 9.83, "learning_rate": 3.2465546391208355e-05, "loss": 0.0167, "step": 10350, "task_loss": 0.007019467651844025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01618257164955139, "epoch": 9.83, "learning_rate": 3.245537665042903e-05, "loss": 0.0214, "step": 10351, "task_loss": 0.0686056837439537 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03626525402069092, "epoch": 9.83, "learning_rate": 3.244520555534463e-05, "loss": 0.034, "step": 10352, "task_loss": 0.014076223596930504 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10587593913078308, "epoch": 9.83, "learning_rate": 3.243503310780278e-05, "loss": 0.102, "step": 10353, "task_loss": 0.06675413995981216 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014715575613081455, "epoch": 9.83, "learning_rate": 3.242485930965136e-05, "loss": 0.0137, "step": 10354, "task_loss": 0.004238050431013107 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0231354720890522, "epoch": 9.83, "learning_rate": 3.241468416273849e-05, "loss": 0.0226, "step": 10355, "task_loss": 0.01763581857085228 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09283407032489777, "epoch": 9.83, "learning_rate": 3.2404507668912534e-05, "loss": 0.0874, "step": 10356, "task_loss": 0.03887883201241493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04399856925010681, "epoch": 9.84, "learning_rate": 3.2394329830022095e-05, "loss": 0.0429, "step": 10357, "task_loss": 0.0327279269695282 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.042265597730875015, "epoch": 9.84, "learning_rate": 3.238415064791603e-05, "loss": 0.0397, "step": 10358, "task_loss": 0.016846617683768272 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024321310222148895, "epoch": 9.84, "learning_rate": 3.237397012444344e-05, "loss": 0.0254, "step": 10359, "task_loss": 0.03516604006290436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03976666182279587, "epoch": 9.84, "learning_rate": 3.2363788261453664e-05, "loss": 0.0445, "step": 10360, "task_loss": 0.08759070187807083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07277016341686249, "epoch": 9.84, "learning_rate": 3.2353605060796286e-05, "loss": 0.067, "step": 10361, "task_loss": 0.015487806871533394 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013543391600251198, "epoch": 9.84, "learning_rate": 3.2343420524321134e-05, "loss": 0.0139, "step": 10362, "task_loss": 0.01754959300160408 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06739618629217148, "epoch": 9.84, "learning_rate": 3.2333234653878275e-05, "loss": 0.065, "step": 10363, "task_loss": 0.04387857764959335 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06469708681106567, "epoch": 9.84, "learning_rate": 3.2323047451318023e-05, "loss": 0.0679, "step": 10364, "task_loss": 0.09665581583976746 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025472253561019897, "epoch": 9.84, "learning_rate": 3.2312858918490936e-05, "loss": 0.0235, "step": 10365, "task_loss": 0.005352867767214775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08233923465013504, "epoch": 9.84, "learning_rate": 3.2302669057247806e-05, "loss": 0.0887, "step": 10366, "task_loss": 0.14634165167808533 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09745900332927704, "epoch": 9.85, "learning_rate": 3.2292477869439666e-05, "loss": 0.1002, "step": 10367, "task_loss": 0.1244800016283989 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0947013646364212, "epoch": 9.85, "learning_rate": 3.228228535691781e-05, "loss": 0.0893, "step": 10368, "task_loss": 0.04106832295656204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019390946254134178, "epoch": 9.85, "learning_rate": 3.2272091521533745e-05, "loss": 0.0182, "step": 10369, "task_loss": 0.007950548082590103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020949633792042732, "epoch": 9.85, "learning_rate": 3.226189636513923e-05, "loss": 0.0202, "step": 10370, "task_loss": 0.013539431616663933 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015328247100114822, "epoch": 9.85, "learning_rate": 3.225169988958627e-05, "loss": 0.0147, "step": 10371, "task_loss": 0.009431937709450722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029308216646313667, "epoch": 9.85, "learning_rate": 3.2241502096727095e-05, "loss": 0.0282, "step": 10372, "task_loss": 0.018634788691997528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.056150779128074646, "epoch": 9.85, "learning_rate": 3.2231302988414194e-05, "loss": 0.0586, "step": 10373, "task_loss": 0.08058308064937592 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03458091989159584, "epoch": 9.85, "learning_rate": 3.222110256650028e-05, "loss": 0.0484, "step": 10374, "task_loss": 0.17240062355995178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06620018929243088, "epoch": 9.85, "learning_rate": 3.2210900832838295e-05, "loss": 0.0788, "step": 10375, "task_loss": 0.1921333372592926 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04633745551109314, "epoch": 9.85, "learning_rate": 3.220069778928146e-05, "loss": 0.0467, "step": 10376, "task_loss": 0.04954282194375992 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027524154633283615, "epoch": 9.85, "learning_rate": 3.2190493437683185e-05, "loss": 0.0322, "step": 10377, "task_loss": 0.07475399971008301 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.062353409826755524, "epoch": 9.86, "learning_rate": 3.2180287779897155e-05, "loss": 0.0616, "step": 10378, "task_loss": 0.05466865003108978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0365450382232666, "epoch": 9.86, "learning_rate": 3.217008081777726e-05, "loss": 0.0391, "step": 10379, "task_loss": 0.06168461591005325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07208600640296936, "epoch": 9.86, "learning_rate": 3.2159872553177655e-05, "loss": 0.0865, "step": 10380, "task_loss": 0.21665066480636597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013081874698400497, "epoch": 9.86, "learning_rate": 3.2149662987952725e-05, "loss": 0.0183, "step": 10381, "task_loss": 0.0652560293674469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11617452651262283, "epoch": 9.86, "learning_rate": 3.213945212395707e-05, "loss": 0.112, "step": 10382, "task_loss": 0.07452362775802612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02089625969529152, "epoch": 9.86, "learning_rate": 3.212923996304556e-05, "loss": 0.0311, "step": 10383, "task_loss": 0.12253687530755997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0406123585999012, "epoch": 9.86, "learning_rate": 3.211902650707327e-05, "loss": 0.0457, "step": 10384, "task_loss": 0.09141912311315536 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08201208710670471, "epoch": 9.86, "learning_rate": 3.210881175789553e-05, "loss": 0.092, "step": 10385, "task_loss": 0.18144634366035461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02090279385447502, "epoch": 9.86, "learning_rate": 3.209859571736791e-05, "loss": 0.0203, "step": 10386, "task_loss": 0.014566395431756973 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05922047048807144, "epoch": 9.86, "learning_rate": 3.208837838734618e-05, "loss": 0.061, "step": 10387, "task_loss": 0.07677384465932846 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05778921768069267, "epoch": 9.87, "learning_rate": 3.207815976968638e-05, "loss": 0.0572, "step": 10388, "task_loss": 0.052189189940690994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03705509752035141, "epoch": 9.87, "learning_rate": 3.2067939866244764e-05, "loss": 0.0341, "step": 10389, "task_loss": 0.00781635195016861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022746965289115906, "epoch": 9.87, "learning_rate": 3.205771867887784e-05, "loss": 0.0218, "step": 10390, "task_loss": 0.013284927234053612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04225531220436096, "epoch": 9.87, "learning_rate": 3.204749620944232e-05, "loss": 0.0421, "step": 10391, "task_loss": 0.041095249354839325 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08849678933620453, "epoch": 9.87, "learning_rate": 3.203727245979518e-05, "loss": 0.0892, "step": 10392, "task_loss": 0.09528757631778717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1014036163687706, "epoch": 9.87, "learning_rate": 3.20270474317936e-05, "loss": 0.1055, "step": 10393, "task_loss": 0.14196741580963135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04590814188122749, "epoch": 9.87, "learning_rate": 3.201682112729502e-05, "loss": 0.0474, "step": 10394, "task_loss": 0.06046411767601967 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03985198959708214, "epoch": 9.87, "learning_rate": 3.2006593548157074e-05, "loss": 0.0387, "step": 10395, "task_loss": 0.028516611084342003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06808362156152725, "epoch": 9.87, "learning_rate": 3.1996364696237676e-05, "loss": 0.0643, "step": 10396, "task_loss": 0.030522214248776436 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03122507780790329, "epoch": 9.87, "learning_rate": 3.198613457339493e-05, "loss": 0.0428, "step": 10397, "task_loss": 0.14679580926895142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02945810928940773, "epoch": 9.87, "learning_rate": 3.19759031814872e-05, "loss": 0.0363, "step": 10398, "task_loss": 0.09743687510490417 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02599484845995903, "epoch": 9.88, "learning_rate": 3.196567052237306e-05, "loss": 0.0254, "step": 10399, "task_loss": 0.020352628082036972 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.22350770235061646, "epoch": 9.88, "learning_rate": 3.195543659791132e-05, "loss": 0.2232, "step": 10400, "task_loss": 0.2200383096933365 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041732676327228546, "epoch": 9.88, "learning_rate": 3.194520140996102e-05, "loss": 0.0469, "step": 10401, "task_loss": 0.09368692338466644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041806213557720184, "epoch": 9.88, "learning_rate": 3.193496496038144e-05, "loss": 0.0622, "step": 10402, "task_loss": 0.24602389335632324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0394703671336174, "epoch": 9.88, "learning_rate": 3.1924727251032075e-05, "loss": 0.0432, "step": 10403, "task_loss": 0.07703036814928055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034872762858867645, "epoch": 9.88, "learning_rate": 3.191448828377267e-05, "loss": 0.0486, "step": 10404, "task_loss": 0.17233772575855255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10517394542694092, "epoch": 9.88, "learning_rate": 3.1904248060463146e-05, "loss": 0.1068, "step": 10405, "task_loss": 0.12141425162553787 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010680675506591797, "epoch": 9.88, "learning_rate": 3.189400658296372e-05, "loss": 0.0104, "step": 10406, "task_loss": 0.007806859910488129 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09319774061441422, "epoch": 9.88, "learning_rate": 3.188376385313479e-05, "loss": 0.0928, "step": 10407, "task_loss": 0.08945365995168686 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05169279873371124, "epoch": 9.88, "learning_rate": 3.187351987283701e-05, "loss": 0.0558, "step": 10408, "task_loss": 0.09317981451749802 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02045218087732792, "epoch": 9.89, "learning_rate": 3.1863274643931244e-05, "loss": 0.0263, "step": 10409, "task_loss": 0.0785207599401474 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07327836751937866, "epoch": 9.89, "learning_rate": 3.185302816827858e-05, "loss": 0.0837, "step": 10410, "task_loss": 0.17741957306861877 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012392308562994003, "epoch": 9.89, "learning_rate": 3.184278044774035e-05, "loss": 0.0115, "step": 10411, "task_loss": 0.003290366381406784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.052759043872356415, "epoch": 9.89, "learning_rate": 3.183253148417808e-05, "loss": 0.0562, "step": 10412, "task_loss": 0.08686379343271255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02482384815812111, "epoch": 9.89, "learning_rate": 3.182228127945358e-05, "loss": 0.0357, "step": 10413, "task_loss": 0.13362696766853333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03491012752056122, "epoch": 9.89, "learning_rate": 3.1812029835428825e-05, "loss": 0.0356, "step": 10414, "task_loss": 0.04205203801393509 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06706416606903076, "epoch": 9.89, "learning_rate": 3.1801777153966034e-05, "loss": 0.071, "step": 10415, "task_loss": 0.1061612218618393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0414917916059494, "epoch": 9.89, "learning_rate": 3.179152323692767e-05, "loss": 0.0383, "step": 10416, "task_loss": 0.009932199493050575 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022580554708838463, "epoch": 9.89, "learning_rate": 3.1781268086176406e-05, "loss": 0.0401, "step": 10417, "task_loss": 0.1979600191116333 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07729755342006683, "epoch": 9.89, "learning_rate": 3.177101170357513e-05, "loss": 0.0813, "step": 10418, "task_loss": 0.11744911223649979 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017039090394973755, "epoch": 9.89, "learning_rate": 3.1760754090986975e-05, "loss": 0.0158, "step": 10419, "task_loss": 0.004911573603749275 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034923627972602844, "epoch": 9.9, "learning_rate": 3.175049525027527e-05, "loss": 0.0393, "step": 10420, "task_loss": 0.07879183441400528 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015226260758936405, "epoch": 9.9, "learning_rate": 3.17402351833036e-05, "loss": 0.0209, "step": 10421, "task_loss": 0.07186593115329742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04073633253574371, "epoch": 9.9, "learning_rate": 3.1729973891935745e-05, "loss": 0.0493, "step": 10422, "task_loss": 0.12683020532131195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059145376086235046, "epoch": 9.9, "learning_rate": 3.1719711378035714e-05, "loss": 0.0822, "step": 10423, "task_loss": 0.2894851565361023 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04453141614794731, "epoch": 9.9, "learning_rate": 3.1709447643467755e-05, "loss": 0.0422, "step": 10424, "task_loss": 0.021708250045776367 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027283282950520515, "epoch": 9.9, "learning_rate": 3.1699182690096316e-05, "loss": 0.036, "step": 10425, "task_loss": 0.11464565247297287 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.24350899457931519, "epoch": 9.9, "learning_rate": 3.168891651978609e-05, "loss": 0.2368, "step": 10426, "task_loss": 0.17603522539138794 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07290820777416229, "epoch": 9.9, "learning_rate": 3.167864913440195e-05, "loss": 0.0691, "step": 10427, "task_loss": 0.034495919942855835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05629532411694527, "epoch": 9.9, "learning_rate": 3.1668380535809036e-05, "loss": 0.0606, "step": 10428, "task_loss": 0.09951600432395935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07032791525125504, "epoch": 9.9, "learning_rate": 3.165811072587267e-05, "loss": 0.0833, "step": 10429, "task_loss": 0.20026040077209473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02635234221816063, "epoch": 9.91, "learning_rate": 3.164783970645845e-05, "loss": 0.0334, "step": 10430, "task_loss": 0.09689018875360489 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014292672276496887, "epoch": 9.91, "learning_rate": 3.1637567479432113e-05, "loss": 0.0284, "step": 10431, "task_loss": 0.15557925403118134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038801245391368866, "epoch": 9.91, "learning_rate": 3.1627294046659675e-05, "loss": 0.042, "step": 10432, "task_loss": 0.07107855379581451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02201734110713005, "epoch": 9.91, "learning_rate": 3.1617019410007366e-05, "loss": 0.0274, "step": 10433, "task_loss": 0.07534562796354294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01769842579960823, "epoch": 9.91, "learning_rate": 3.16067435713416e-05, "loss": 0.0172, "step": 10434, "task_loss": 0.012379692867398262 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026350311934947968, "epoch": 9.91, "learning_rate": 3.159646653252906e-05, "loss": 0.0298, "step": 10435, "task_loss": 0.06069865822792053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01953766867518425, "epoch": 9.91, "learning_rate": 3.1586188295436594e-05, "loss": 0.0303, "step": 10436, "task_loss": 0.12754112482070923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03570760414004326, "epoch": 9.91, "learning_rate": 3.1575908861931314e-05, "loss": 0.0438, "step": 10437, "task_loss": 0.11667999625205994 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018649637699127197, "epoch": 9.91, "learning_rate": 3.156562823388051e-05, "loss": 0.0237, "step": 10438, "task_loss": 0.06956367939710617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029894903302192688, "epoch": 9.91, "learning_rate": 3.155534641315172e-05, "loss": 0.0362, "step": 10439, "task_loss": 0.09323906153440475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020881684496998787, "epoch": 9.91, "learning_rate": 3.154506340161269e-05, "loss": 0.0296, "step": 10440, "task_loss": 0.10763251781463623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06025876849889755, "epoch": 9.92, "learning_rate": 3.1534779201131366e-05, "loss": 0.0771, "step": 10441, "task_loss": 0.22887033224105835 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059150584042072296, "epoch": 9.92, "learning_rate": 3.152449381357593e-05, "loss": 0.0634, "step": 10442, "task_loss": 0.1019982397556305 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08557627350091934, "epoch": 9.92, "learning_rate": 3.151420724081478e-05, "loss": 0.0847, "step": 10443, "task_loss": 0.07654394954442978 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05869875103235245, "epoch": 9.92, "learning_rate": 3.1503919484716495e-05, "loss": 0.0728, "step": 10444, "task_loss": 0.19989144802093506 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01628267765045166, "epoch": 9.92, "learning_rate": 3.149363054714992e-05, "loss": 0.0268, "step": 10445, "task_loss": 0.12150625884532928 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0214032344520092, "epoch": 9.92, "learning_rate": 3.148334042998408e-05, "loss": 0.0206, "step": 10446, "task_loss": 0.013740543276071548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0316503532230854, "epoch": 9.92, "learning_rate": 3.1473049135088225e-05, "loss": 0.0297, "step": 10447, "task_loss": 0.012053485959768295 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012814265675842762, "epoch": 9.92, "learning_rate": 3.146275666433183e-05, "loss": 0.0122, "step": 10448, "task_loss": 0.007167477160692215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05052739381790161, "epoch": 9.92, "learning_rate": 3.145246301958455e-05, "loss": 0.0509, "step": 10449, "task_loss": 0.054102275520563126 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.044567592442035675, "epoch": 9.92, "learning_rate": 3.14421682027163e-05, "loss": 0.0422, "step": 10450, "task_loss": 0.021235253661870956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019051125273108482, "epoch": 9.92, "learning_rate": 3.143187221559715e-05, "loss": 0.0181, "step": 10451, "task_loss": 0.009563138708472252 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0175587460398674, "epoch": 9.93, "learning_rate": 3.142157506009746e-05, "loss": 0.0162, "step": 10452, "task_loss": 0.003730185329914093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025068845599889755, "epoch": 9.93, "learning_rate": 3.141127673808772e-05, "loss": 0.0249, "step": 10453, "task_loss": 0.023736948147416115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012271126732230186, "epoch": 9.93, "learning_rate": 3.140097725143868e-05, "loss": 0.0116, "step": 10454, "task_loss": 0.005191892385482788 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0380866639316082, "epoch": 9.93, "learning_rate": 3.13906766020213e-05, "loss": 0.0387, "step": 10455, "task_loss": 0.04454638063907623 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0549575500190258, "epoch": 9.93, "learning_rate": 3.138037479170674e-05, "loss": 0.0522, "step": 10456, "task_loss": 0.02707308530807495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06924262642860413, "epoch": 9.93, "learning_rate": 3.137007182236637e-05, "loss": 0.0731, "step": 10457, "task_loss": 0.10814409703016281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02348874695599079, "epoch": 9.93, "learning_rate": 3.1359767695871767e-05, "loss": 0.0272, "step": 10458, "task_loss": 0.06097613647580147 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04042445123195648, "epoch": 9.93, "learning_rate": 3.134946241409474e-05, "loss": 0.0492, "step": 10459, "task_loss": 0.127986341714859 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018123149871826172, "epoch": 9.93, "learning_rate": 3.133915597890729e-05, "loss": 0.02, "step": 10460, "task_loss": 0.036422938108444214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018057895824313164, "epoch": 9.93, "learning_rate": 3.132884839218162e-05, "loss": 0.0172, "step": 10461, "task_loss": 0.009080575779080391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02193017117679119, "epoch": 9.94, "learning_rate": 3.131853965579016e-05, "loss": 0.0215, "step": 10462, "task_loss": 0.017662420868873596 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041326068341732025, "epoch": 9.94, "learning_rate": 3.130822977160554e-05, "loss": 0.0484, "step": 10463, "task_loss": 0.11219672113656998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015017848461866379, "epoch": 9.94, "learning_rate": 3.129791874150062e-05, "loss": 0.0244, "step": 10464, "task_loss": 0.1089889407157898 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01874970830976963, "epoch": 9.94, "learning_rate": 3.1287606567348406e-05, "loss": 0.0256, "step": 10465, "task_loss": 0.08684463053941727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020482422783970833, "epoch": 9.94, "learning_rate": 3.1277293251022185e-05, "loss": 0.0274, "step": 10466, "task_loss": 0.08978745341300964 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02151617407798767, "epoch": 9.94, "learning_rate": 3.126697879439541e-05, "loss": 0.0205, "step": 10467, "task_loss": 0.011844877153635025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01606358028948307, "epoch": 9.94, "learning_rate": 3.1256663199341764e-05, "loss": 0.0152, "step": 10468, "task_loss": 0.00763893686234951 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030537568032741547, "epoch": 9.94, "learning_rate": 3.124634646773511e-05, "loss": 0.0283, "step": 10469, "task_loss": 0.007955452427268028 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02014528028666973, "epoch": 9.94, "learning_rate": 3.1236028601449534e-05, "loss": 0.0195, "step": 10470, "task_loss": 0.013426566496491432 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02128102444112301, "epoch": 9.94, "learning_rate": 3.1225709602359335e-05, "loss": 0.0285, "step": 10471, "task_loss": 0.09380625188350677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016784438863396645, "epoch": 9.94, "learning_rate": 3.1215389472338995e-05, "loss": 0.0157, "step": 10472, "task_loss": 0.0061706434935331345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04040418565273285, "epoch": 9.95, "learning_rate": 3.1205068213263234e-05, "loss": 0.043, "step": 10473, "task_loss": 0.06682712584733963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012193303555250168, "epoch": 9.95, "learning_rate": 3.119474582700695e-05, "loss": 0.0168, "step": 10474, "task_loss": 0.05839819461107254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06726071983575821, "epoch": 9.95, "learning_rate": 3.118442231544524e-05, "loss": 0.071, "step": 10475, "task_loss": 0.10484815388917923 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026226460933685303, "epoch": 9.95, "learning_rate": 3.117409768045344e-05, "loss": 0.0296, "step": 10476, "task_loss": 0.06030692532658577 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011876748874783516, "epoch": 9.95, "learning_rate": 3.116377192390706e-05, "loss": 0.0113, "step": 10477, "task_loss": 0.006212221458554268 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08747504651546478, "epoch": 9.95, "learning_rate": 3.115344504768183e-05, "loss": 0.0951, "step": 10478, "task_loss": 0.16354155540466309 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07612023502588272, "epoch": 9.95, "learning_rate": 3.1143117053653665e-05, "loss": 0.0816, "step": 10479, "task_loss": 0.1306367814540863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02969006821513176, "epoch": 9.95, "learning_rate": 3.113278794369869e-05, "loss": 0.0357, "step": 10480, "task_loss": 0.08966681361198425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08898676186800003, "epoch": 9.95, "learning_rate": 3.112245771969327e-05, "loss": 0.0848, "step": 10481, "task_loss": 0.04718249663710594 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11488527804613113, "epoch": 9.95, "learning_rate": 3.1112126383513914e-05, "loss": 0.1121, "step": 10482, "task_loss": 0.08729420602321625 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.048652954399585724, "epoch": 9.96, "learning_rate": 3.110179393703737e-05, "loss": 0.0742, "step": 10483, "task_loss": 0.30367979407310486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019496137276291847, "epoch": 9.96, "learning_rate": 3.109146038214055e-05, "loss": 0.0269, "step": 10484, "task_loss": 0.09309974312782288 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01703246682882309, "epoch": 9.96, "learning_rate": 3.108112572070063e-05, "loss": 0.0197, "step": 10485, "task_loss": 0.043584369122982025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05471110716462135, "epoch": 9.96, "learning_rate": 3.1070789954594934e-05, "loss": 0.0517, "step": 10486, "task_loss": 0.024789560586214066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02847129851579666, "epoch": 9.96, "learning_rate": 3.1060453085701e-05, "loss": 0.0314, "step": 10487, "task_loss": 0.05726098641753197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018717454746365547, "epoch": 9.96, "learning_rate": 3.105011511589658e-05, "loss": 0.0232, "step": 10488, "task_loss": 0.06356090307235718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030354956164956093, "epoch": 9.96, "learning_rate": 3.103977604705961e-05, "loss": 0.0287, "step": 10489, "task_loss": 0.013773368671536446 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05581222474575043, "epoch": 9.96, "learning_rate": 3.102943588106824e-05, "loss": 0.053, "step": 10490, "task_loss": 0.027538809925317764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021054701879620552, "epoch": 9.96, "learning_rate": 3.10190946198008e-05, "loss": 0.0211, "step": 10491, "task_loss": 0.021210404112935066 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041227295994758606, "epoch": 9.96, "learning_rate": 3.100875226513583e-05, "loss": 0.0386, "step": 10492, "task_loss": 0.015084227547049522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03447715565562248, "epoch": 9.96, "learning_rate": 3.099840881895208e-05, "loss": 0.0406, "step": 10493, "task_loss": 0.09537314623594284 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05250038951635361, "epoch": 9.97, "learning_rate": 3.098806428312847e-05, "loss": 0.0565, "step": 10494, "task_loss": 0.09219056367874146 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0351855605840683, "epoch": 9.97, "learning_rate": 3.097771865954415e-05, "loss": 0.036, "step": 10495, "task_loss": 0.04331498593091965 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0283675417304039, "epoch": 9.97, "learning_rate": 3.096737195007845e-05, "loss": 0.0341, "step": 10496, "task_loss": 0.08616603910923004 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029870422556996346, "epoch": 9.97, "learning_rate": 3.0957024156610884e-05, "loss": 0.038, "step": 10497, "task_loss": 0.11162212491035461 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012384091503918171, "epoch": 9.97, "learning_rate": 3.09466752810212e-05, "loss": 0.0158, "step": 10498, "task_loss": 0.04627315700054169 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013409021310508251, "epoch": 9.97, "learning_rate": 3.093632532518931e-05, "loss": 0.0155, "step": 10499, "task_loss": 0.03382343798875809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010947839356958866, "epoch": 9.97, "learning_rate": 3.092597429099534e-05, "loss": 0.0137, "step": 10500, "task_loss": 0.03889209032058716 }, { "epoch": 9.97, "eval_accuracy": 0.8956422018348624, "eval_loss": 0.4390338361263275, "eval_runtime": 18.2957, "eval_samples_per_second": 47.661, "eval_steps_per_second": 5.958, "step": 10500 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02746719866991043, "epoch": 9.97, "learning_rate": 3.0915622180319585e-05, "loss": 0.0316, "step": 10501, "task_loss": 0.06869719922542572 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039203185588121414, "epoch": 9.97, "learning_rate": 3.090526899504259e-05, "loss": 0.0372, "step": 10502, "task_loss": 0.019136063754558563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025588029995560646, "epoch": 9.97, "learning_rate": 3.0894914737045034e-05, "loss": 0.0305, "step": 10503, "task_loss": 0.07455385476350784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030900314450263977, "epoch": 9.98, "learning_rate": 3.088455940820782e-05, "loss": 0.0378, "step": 10504, "task_loss": 0.09984858334064484 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059486109763383865, "epoch": 9.98, "learning_rate": 3.087420301041206e-05, "loss": 0.0644, "step": 10505, "task_loss": 0.10867089033126831 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012473450042307377, "epoch": 9.98, "learning_rate": 3.086384554553902e-05, "loss": 0.0157, "step": 10506, "task_loss": 0.044721417129039764 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037564076483249664, "epoch": 9.98, "learning_rate": 3.0853487015470206e-05, "loss": 0.0342, "step": 10507, "task_loss": 0.00427047535777092 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.21402209997177124, "epoch": 9.98, "learning_rate": 3.084312742208728e-05, "loss": 0.2225, "step": 10508, "task_loss": 0.2992434501647949 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05252481997013092, "epoch": 9.98, "learning_rate": 3.083276676727212e-05, "loss": 0.0562, "step": 10509, "task_loss": 0.08937288820743561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02034352719783783, "epoch": 9.98, "learning_rate": 3.082240505290678e-05, "loss": 0.0345, "step": 10510, "task_loss": 0.16238275170326233 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02308393456041813, "epoch": 9.98, "learning_rate": 3.081204228087353e-05, "loss": 0.0238, "step": 10511, "task_loss": 0.02994043007493019 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022067924961447716, "epoch": 9.98, "learning_rate": 3.08016784530548e-05, "loss": 0.0206, "step": 10512, "task_loss": 0.007659193128347397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017336195334792137, "epoch": 9.98, "learning_rate": 3.0791313571333244e-05, "loss": 0.0188, "step": 10513, "task_loss": 0.031775638461112976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04067476838827133, "epoch": 9.98, "learning_rate": 3.078094763759168e-05, "loss": 0.0461, "step": 10514, "task_loss": 0.09492932260036469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01405811682343483, "epoch": 9.99, "learning_rate": 3.0770580653713146e-05, "loss": 0.0137, "step": 10515, "task_loss": 0.010203549638390541 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023571021854877472, "epoch": 9.99, "learning_rate": 3.076021262158084e-05, "loss": 0.038, "step": 10516, "task_loss": 0.1676616072654724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019903654232621193, "epoch": 9.99, "learning_rate": 3.074984354307817e-05, "loss": 0.0192, "step": 10517, "task_loss": 0.012664202600717545 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0405997633934021, "epoch": 9.99, "learning_rate": 3.073947342008873e-05, "loss": 0.0409, "step": 10518, "task_loss": 0.04333402216434479 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022368015721440315, "epoch": 9.99, "learning_rate": 3.07291022544963e-05, "loss": 0.0232, "step": 10519, "task_loss": 0.030709004029631615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013003375381231308, "epoch": 9.99, "learning_rate": 3.0718730048184855e-05, "loss": 0.0123, "step": 10520, "task_loss": 0.0056927260011434555 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021634429693222046, "epoch": 9.99, "learning_rate": 3.0708356803038556e-05, "loss": 0.0208, "step": 10521, "task_loss": 0.01319202035665512 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026351455599069595, "epoch": 9.99, "learning_rate": 3.069798252094175e-05, "loss": 0.0246, "step": 10522, "task_loss": 0.008785083889961243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08953380584716797, "epoch": 9.99, "learning_rate": 3.068760720377897e-05, "loss": 0.0882, "step": 10523, "task_loss": 0.07605984061956406 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01611064001917839, "epoch": 9.99, "learning_rate": 3.067723085343496e-05, "loss": 0.0154, "step": 10524, "task_loss": 0.008628584444522858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03214561194181442, "epoch": 10.0, "learning_rate": 3.066685347179462e-05, "loss": 0.0427, "step": 10525, "task_loss": 0.13809731602668762 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017522085458040237, "epoch": 10.0, "learning_rate": 3.065647506074306e-05, "loss": 0.0165, "step": 10526, "task_loss": 0.007697628811001778 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03798111528158188, "epoch": 10.0, "learning_rate": 3.064609562216555e-05, "loss": 0.0371, "step": 10527, "task_loss": 0.028712373226881027 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08762382715940475, "epoch": 10.0, "learning_rate": 3.063571515794759e-05, "loss": 0.09, "step": 10528, "task_loss": 0.11169522255659103 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.060012731701135635, "epoch": 10.0, "learning_rate": 3.062533366997483e-05, "loss": 0.0631, "step": 10529, "task_loss": 0.09081701934337616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06982729583978653, "epoch": 10.0, "learning_rate": 3.061495116013311e-05, "loss": 0.0848, "step": 10530, "task_loss": 0.21950267255306244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015027199871838093, "epoch": 10.0, "learning_rate": 3.060456763030847e-05, "loss": 0.0151, "step": 10531, "task_loss": 0.015308814123272896 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010798173025250435, "epoch": 10.0, "learning_rate": 3.059418308238713e-05, "loss": 0.011, "step": 10532, "task_loss": 0.012500785291194916 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03574404492974281, "epoch": 10.0, "learning_rate": 3.0583797518255505e-05, "loss": 0.0326, "step": 10533, "task_loss": 0.0042223744094371796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021271761506795883, "epoch": 10.0, "learning_rate": 3.057341093980015e-05, "loss": 0.0316, "step": 10534, "task_loss": 0.12449042499065399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11009443551301956, "epoch": 10.0, "learning_rate": 3.056302334890786e-05, "loss": 0.1068, "step": 10535, "task_loss": 0.07683957368135452 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018123319372534752, "epoch": 10.01, "learning_rate": 3.055263474746559e-05, "loss": 0.0185, "step": 10536, "task_loss": 0.021713176742196083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016921810805797577, "epoch": 10.01, "learning_rate": 3.054224513736048e-05, "loss": 0.0374, "step": 10537, "task_loss": 0.2221601903438568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017352506518363953, "epoch": 10.01, "learning_rate": 3.0531854520479844e-05, "loss": 0.0216, "step": 10538, "task_loss": 0.05965179204940796 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04468035697937012, "epoch": 10.01, "learning_rate": 3.05214628987112e-05, "loss": 0.0476, "step": 10539, "task_loss": 0.07353688031435013 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015710055828094482, "epoch": 10.01, "learning_rate": 3.0511070273942217e-05, "loss": 0.0148, "step": 10540, "task_loss": 0.006397552788257599 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014855926856398582, "epoch": 10.01, "learning_rate": 3.0500676648060776e-05, "loss": 0.0181, "step": 10541, "task_loss": 0.046922795474529266 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019756915047764778, "epoch": 10.01, "learning_rate": 3.049028202295494e-05, "loss": 0.0209, "step": 10542, "task_loss": 0.03078971616923809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014726649969816208, "epoch": 10.01, "learning_rate": 3.0479886400512937e-05, "loss": 0.0273, "step": 10543, "task_loss": 0.14058496057987213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030262373387813568, "epoch": 10.01, "learning_rate": 3.0469489782623163e-05, "loss": 0.029, "step": 10544, "task_loss": 0.01749301515519619 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024717876687645912, "epoch": 10.01, "learning_rate": 3.045909217117424e-05, "loss": 0.0291, "step": 10545, "task_loss": 0.06830822676420212 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010806870646774769, "epoch": 10.02, "learning_rate": 3.0448693568054924e-05, "loss": 0.0103, "step": 10546, "task_loss": 0.005712360143661499 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022307565435767174, "epoch": 10.02, "learning_rate": 3.0438293975154186e-05, "loss": 0.0247, "step": 10547, "task_loss": 0.04641730338335037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031023293733596802, "epoch": 10.02, "learning_rate": 3.042789339436116e-05, "loss": 0.0311, "step": 10548, "task_loss": 0.031546495854854584 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02841011807322502, "epoch": 10.02, "learning_rate": 3.041749182756515e-05, "loss": 0.0366, "step": 10549, "task_loss": 0.11078496277332306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03763877600431442, "epoch": 10.02, "learning_rate": 3.0407089276655664e-05, "loss": 0.0385, "step": 10550, "task_loss": 0.04576456546783447 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01621590554714203, "epoch": 10.02, "learning_rate": 3.039668574352237e-05, "loss": 0.027, "step": 10551, "task_loss": 0.12407008558511734 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018488582223653793, "epoch": 10.02, "learning_rate": 3.0386281230055113e-05, "loss": 0.0176, "step": 10552, "task_loss": 0.009799566119909286 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012251723557710648, "epoch": 10.02, "learning_rate": 3.0375875738143938e-05, "loss": 0.0197, "step": 10553, "task_loss": 0.08636859059333801 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019306058064103127, "epoch": 10.02, "learning_rate": 3.0365469269679042e-05, "loss": 0.0194, "step": 10554, "task_loss": 0.020166553556919098 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022004347294569016, "epoch": 10.02, "learning_rate": 3.0355061826550813e-05, "loss": 0.0204, "step": 10555, "task_loss": 0.005572935566306114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010447701439261436, "epoch": 10.02, "learning_rate": 3.0344653410649815e-05, "loss": 0.0171, "step": 10556, "task_loss": 0.07670579850673676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023317281156778336, "epoch": 10.03, "learning_rate": 3.033424402386678e-05, "loss": 0.0288, "step": 10557, "task_loss": 0.07775168120861053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01650930941104889, "epoch": 10.03, "learning_rate": 3.032383366809263e-05, "loss": 0.0173, "step": 10558, "task_loss": 0.02475823275744915 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016953203827142715, "epoch": 10.03, "learning_rate": 3.031342234521845e-05, "loss": 0.016, "step": 10559, "task_loss": 0.007815249264240265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.045376237481832504, "epoch": 10.03, "learning_rate": 3.030301005713552e-05, "loss": 0.0476, "step": 10560, "task_loss": 0.06719513982534409 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010588767006993294, "epoch": 10.03, "learning_rate": 3.0292596805735274e-05, "loss": 0.0157, "step": 10561, "task_loss": 0.06145908683538437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013043254613876343, "epoch": 10.03, "learning_rate": 3.028218259290932e-05, "loss": 0.0121, "step": 10562, "task_loss": 0.003523891791701317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030626816675066948, "epoch": 10.03, "learning_rate": 3.0271767420549463e-05, "loss": 0.0305, "step": 10563, "task_loss": 0.02899116836488247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015171288512647152, "epoch": 10.03, "learning_rate": 3.0261351290547667e-05, "loss": 0.019, "step": 10564, "task_loss": 0.053528182208538055 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011217884719371796, "epoch": 10.03, "learning_rate": 3.025093420479607e-05, "loss": 0.0113, "step": 10565, "task_loss": 0.012106716632843018 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.055433209985494614, "epoch": 10.03, "learning_rate": 3.0240516165186976e-05, "loss": 0.055, "step": 10566, "task_loss": 0.05153985321521759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020901085808873177, "epoch": 10.04, "learning_rate": 3.0230097173612896e-05, "loss": 0.0282, "step": 10567, "task_loss": 0.09422680735588074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00867534801363945, "epoch": 10.04, "learning_rate": 3.021967723196647e-05, "loss": 0.0227, "step": 10568, "task_loss": 0.1493276208639145 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02514798566699028, "epoch": 10.04, "learning_rate": 3.020925634214054e-05, "loss": 0.0274, "step": 10569, "task_loss": 0.04775901138782501 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010010787285864353, "epoch": 10.04, "learning_rate": 3.01988345060281e-05, "loss": 0.0094, "step": 10570, "task_loss": 0.0035102032124996185 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03369935229420662, "epoch": 10.04, "learning_rate": 3.018841172552234e-05, "loss": 0.033, "step": 10571, "task_loss": 0.026725368574261665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06951646506786346, "epoch": 10.04, "learning_rate": 3.01779880025166e-05, "loss": 0.073, "step": 10572, "task_loss": 0.10399917513132095 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016049357131123543, "epoch": 10.04, "learning_rate": 3.0167563338904402e-05, "loss": 0.0153, "step": 10573, "task_loss": 0.008852284401655197 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024919727817177773, "epoch": 10.04, "learning_rate": 3.0157137736579445e-05, "loss": 0.024, "step": 10574, "task_loss": 0.01523817703127861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01857706718146801, "epoch": 10.04, "learning_rate": 3.014671119743556e-05, "loss": 0.0173, "step": 10575, "task_loss": 0.0053652990609407425 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016847968101501465, "epoch": 10.04, "learning_rate": 3.013628372336682e-05, "loss": 0.0155, "step": 10576, "task_loss": 0.0033319760113954544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018795382231473923, "epoch": 10.04, "learning_rate": 3.0125855316267394e-05, "loss": 0.0242, "step": 10577, "task_loss": 0.07253532111644745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06431741267442703, "epoch": 10.05, "learning_rate": 3.0115425978031663e-05, "loss": 0.0702, "step": 10578, "task_loss": 0.12317221611738205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047512613236904144, "epoch": 10.05, "learning_rate": 3.0104995710554174e-05, "loss": 0.0457, "step": 10579, "task_loss": 0.02974691055715084 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07968870550394058, "epoch": 10.05, "learning_rate": 3.0094564515729623e-05, "loss": 0.0936, "step": 10580, "task_loss": 0.21833162009716034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008304203860461712, "epoch": 10.05, "learning_rate": 3.0084132395452896e-05, "loss": 0.0079, "step": 10581, "task_loss": 0.004243377596139908 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016128698363900185, "epoch": 10.05, "learning_rate": 3.0073699351619033e-05, "loss": 0.0175, "step": 10582, "task_loss": 0.03016304410994053 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01745760627090931, "epoch": 10.05, "learning_rate": 3.0063265386123247e-05, "loss": 0.0251, "step": 10583, "task_loss": 0.09426887333393097 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023326557129621506, "epoch": 10.05, "learning_rate": 3.0052830500860912e-05, "loss": 0.0215, "step": 10584, "task_loss": 0.004915602505207062 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04375822842121124, "epoch": 10.05, "learning_rate": 3.0042394697727587e-05, "loss": 0.0401, "step": 10585, "task_loss": 0.007045827805995941 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012805687263607979, "epoch": 10.05, "learning_rate": 3.0031957978618986e-05, "loss": 0.012, "step": 10586, "task_loss": 0.005244376137852669 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01600104570388794, "epoch": 10.05, "learning_rate": 3.002152034543098e-05, "loss": 0.015, "step": 10587, "task_loss": 0.005925571545958519 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008856108412146568, "epoch": 10.06, "learning_rate": 3.0011081800059616e-05, "loss": 0.0084, "step": 10588, "task_loss": 0.003961298614740372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02766408771276474, "epoch": 10.06, "learning_rate": 3.0000642344401113e-05, "loss": 0.0406, "step": 10589, "task_loss": 0.15749169886112213 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014702584594488144, "epoch": 10.06, "learning_rate": 2.999020198035184e-05, "loss": 0.0137, "step": 10590, "task_loss": 0.004804585129022598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013725142925977707, "epoch": 10.06, "learning_rate": 2.997976070980836e-05, "loss": 0.0143, "step": 10591, "task_loss": 0.019009843468666077 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027275511994957924, "epoch": 10.06, "learning_rate": 2.996931853466734e-05, "loss": 0.0451, "step": 10592, "task_loss": 0.20579108595848083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009938783943653107, "epoch": 10.06, "learning_rate": 2.9958875456825692e-05, "loss": 0.0093, "step": 10593, "task_loss": 0.003395296633243561 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00878879614174366, "epoch": 10.06, "learning_rate": 2.9948431478180434e-05, "loss": 0.01, "step": 10594, "task_loss": 0.021191343665122986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017414798960089684, "epoch": 10.06, "learning_rate": 2.9937986600628758e-05, "loss": 0.0165, "step": 10595, "task_loss": 0.008245648816227913 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008904863148927689, "epoch": 10.06, "learning_rate": 2.992754082606804e-05, "loss": 0.0085, "step": 10596, "task_loss": 0.004817705601453781 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05227641388773918, "epoch": 10.06, "learning_rate": 2.9917094156395796e-05, "loss": 0.0602, "step": 10597, "task_loss": 0.1313791424036026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016239894554018974, "epoch": 10.06, "learning_rate": 2.990664659350973e-05, "loss": 0.0153, "step": 10598, "task_loss": 0.006773691624403 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028258735314011574, "epoch": 10.07, "learning_rate": 2.9896198139307668e-05, "loss": 0.0283, "step": 10599, "task_loss": 0.028914695605635643 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027807846665382385, "epoch": 10.07, "learning_rate": 2.9885748795687642e-05, "loss": 0.0382, "step": 10600, "task_loss": 0.1314897984266281 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07431833446025848, "epoch": 10.07, "learning_rate": 2.9875298564547805e-05, "loss": 0.0764, "step": 10601, "task_loss": 0.09555675089359283 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03642863780260086, "epoch": 10.07, "learning_rate": 2.9864847447786503e-05, "loss": 0.0349, "step": 10602, "task_loss": 0.021108128130435944 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030721113085746765, "epoch": 10.07, "learning_rate": 2.9854395447302246e-05, "loss": 0.0358, "step": 10603, "task_loss": 0.08126336336135864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03847306966781616, "epoch": 10.07, "learning_rate": 2.9843942564993672e-05, "loss": 0.0412, "step": 10604, "task_loss": 0.06578797101974487 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007311783730983734, "epoch": 10.07, "learning_rate": 2.98334888027596e-05, "loss": 0.007, "step": 10605, "task_loss": 0.004128355532884598 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017765909433364868, "epoch": 10.07, "learning_rate": 2.9823034162499007e-05, "loss": 0.0178, "step": 10606, "task_loss": 0.017818637192249298 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008372337557375431, "epoch": 10.07, "learning_rate": 2.981257864611104e-05, "loss": 0.0081, "step": 10607, "task_loss": 0.0059828683733940125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0075715347193181515, "epoch": 10.07, "learning_rate": 2.980212225549498e-05, "loss": 0.0072, "step": 10608, "task_loss": 0.0034196972846984863 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015017621219158173, "epoch": 10.08, "learning_rate": 2.9791664992550273e-05, "loss": 0.016, "step": 10609, "task_loss": 0.024808872491121292 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03149663656949997, "epoch": 10.08, "learning_rate": 2.978120685917656e-05, "loss": 0.0478, "step": 10610, "task_loss": 0.19493995606899261 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018071962520480156, "epoch": 10.08, "learning_rate": 2.9770747857273584e-05, "loss": 0.0249, "step": 10611, "task_loss": 0.08605194836854935 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020836105570197105, "epoch": 10.08, "learning_rate": 2.9760287988741293e-05, "loss": 0.0341, "step": 10612, "task_loss": 0.15324726700782776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.11783700436353683, "epoch": 10.08, "learning_rate": 2.9749827255479755e-05, "loss": 0.1251, "step": 10613, "task_loss": 0.19090542197227478 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05260676145553589, "epoch": 10.08, "learning_rate": 2.9739365659389223e-05, "loss": 0.0506, "step": 10614, "task_loss": 0.03276871144771576 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020378753542900085, "epoch": 10.08, "learning_rate": 2.972890320237009e-05, "loss": 0.0242, "step": 10615, "task_loss": 0.05864740163087845 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027730267494916916, "epoch": 10.08, "learning_rate": 2.971843988632292e-05, "loss": 0.0394, "step": 10616, "task_loss": 0.14460690319538116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009068669751286507, "epoch": 10.08, "learning_rate": 2.970797571314842e-05, "loss": 0.0085, "step": 10617, "task_loss": 0.0035065151751041412 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014962357468903065, "epoch": 10.08, "learning_rate": 2.9697510684747454e-05, "loss": 0.0271, "step": 10618, "task_loss": 0.13601535558700562 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0636109784245491, "epoch": 10.08, "learning_rate": 2.9687044803021057e-05, "loss": 0.0619, "step": 10619, "task_loss": 0.046097345650196075 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01265124510973692, "epoch": 10.09, "learning_rate": 2.9676578069870392e-05, "loss": 0.0118, "step": 10620, "task_loss": 0.004545770585536957 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02781161107122898, "epoch": 10.09, "learning_rate": 2.9666110487196798e-05, "loss": 0.0261, "step": 10621, "task_loss": 0.011152038350701332 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03320011496543884, "epoch": 10.09, "learning_rate": 2.9655642056901762e-05, "loss": 0.0364, "step": 10622, "task_loss": 0.06528480350971222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08747844398021698, "epoch": 10.09, "learning_rate": 2.9645172780886927e-05, "loss": 0.092, "step": 10623, "task_loss": 0.13289803266525269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024324875324964523, "epoch": 10.09, "learning_rate": 2.9634702661054085e-05, "loss": 0.023, "step": 10624, "task_loss": 0.011415783315896988 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009565697982907295, "epoch": 10.09, "learning_rate": 2.962423169930518e-05, "loss": 0.0089, "step": 10625, "task_loss": 0.003159165382385254 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013625957071781158, "epoch": 10.09, "learning_rate": 2.961375989754232e-05, "loss": 0.0128, "step": 10626, "task_loss": 0.0050227586179971695 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05235249549150467, "epoch": 10.09, "learning_rate": 2.9603287257667754e-05, "loss": 0.0643, "step": 10627, "task_loss": 0.17228303849697113 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021474594250321388, "epoch": 10.09, "learning_rate": 2.9592813781583885e-05, "loss": 0.02, "step": 10628, "task_loss": 0.007135756313800812 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027554640546441078, "epoch": 10.09, "learning_rate": 2.958233947119328e-05, "loss": 0.0254, "step": 10629, "task_loss": 0.005915815010666847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06590898334980011, "epoch": 10.09, "learning_rate": 2.9571864328398636e-05, "loss": 0.0754, "step": 10630, "task_loss": 0.1609448492527008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04161786288022995, "epoch": 10.1, "learning_rate": 2.956138835510282e-05, "loss": 0.0464, "step": 10631, "task_loss": 0.08935808390378952 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01830127462744713, "epoch": 10.1, "learning_rate": 2.9550911553208838e-05, "loss": 0.02, "step": 10632, "task_loss": 0.034862592816352844 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0464775525033474, "epoch": 10.1, "learning_rate": 2.954043392461986e-05, "loss": 0.0431, "step": 10633, "task_loss": 0.01255410723388195 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021407488733530045, "epoch": 10.1, "learning_rate": 2.952995547123919e-05, "loss": 0.0287, "step": 10634, "task_loss": 0.09397697448730469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013649847358465195, "epoch": 10.1, "learning_rate": 2.9519476194970286e-05, "loss": 0.0128, "step": 10635, "task_loss": 0.005092758685350418 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03821259364485741, "epoch": 10.1, "learning_rate": 2.9508996097716777e-05, "loss": 0.0355, "step": 10636, "task_loss": 0.011517934501171112 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0182490237057209, "epoch": 10.1, "learning_rate": 2.949851518138241e-05, "loss": 0.0306, "step": 10637, "task_loss": 0.14180639386177063 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031840305775403976, "epoch": 10.1, "learning_rate": 2.948803344787109e-05, "loss": 0.0356, "step": 10638, "task_loss": 0.06947970390319824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01208855863660574, "epoch": 10.1, "learning_rate": 2.947755089908688e-05, "loss": 0.0287, "step": 10639, "task_loss": 0.17781664431095123 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01693141832947731, "epoch": 10.1, "learning_rate": 2.946706753693398e-05, "loss": 0.0278, "step": 10640, "task_loss": 0.1261083036661148 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014154396951198578, "epoch": 10.11, "learning_rate": 2.945658336331676e-05, "loss": 0.0231, "step": 10641, "task_loss": 0.10346969962120056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0599503219127655, "epoch": 10.11, "learning_rate": 2.9446098380139703e-05, "loss": 0.0846, "step": 10642, "task_loss": 0.30606013536453247 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02652948535978794, "epoch": 10.11, "learning_rate": 2.9435612589307458e-05, "loss": 0.0424, "step": 10643, "task_loss": 0.18499121069908142 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028400473296642303, "epoch": 10.11, "learning_rate": 2.942512599272483e-05, "loss": 0.0444, "step": 10644, "task_loss": 0.18858596682548523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02752113714814186, "epoch": 10.11, "learning_rate": 2.9414638592296752e-05, "loss": 0.031, "step": 10645, "task_loss": 0.06193367764353752 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02233045920729637, "epoch": 10.11, "learning_rate": 2.9404150389928316e-05, "loss": 0.039, "step": 10646, "task_loss": 0.18857558071613312 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012071599252521992, "epoch": 10.11, "learning_rate": 2.9393661387524745e-05, "loss": 0.0204, "step": 10647, "task_loss": 0.0955902636051178 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009622898884117603, "epoch": 10.11, "learning_rate": 2.9383171586991424e-05, "loss": 0.0171, "step": 10648, "task_loss": 0.08440146595239639 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011028185486793518, "epoch": 10.11, "learning_rate": 2.9372680990233875e-05, "loss": 0.0179, "step": 10649, "task_loss": 0.07927846908569336 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0299701951444149, "epoch": 10.11, "learning_rate": 2.9362189599157776e-05, "loss": 0.038, "step": 10650, "task_loss": 0.10991066694259644 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01364204753190279, "epoch": 10.11, "learning_rate": 2.9351697415668917e-05, "loss": 0.0286, "step": 10651, "task_loss": 0.16349458694458008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014143523760139942, "epoch": 10.12, "learning_rate": 2.9341204441673266e-05, "loss": 0.0235, "step": 10652, "task_loss": 0.10781119763851166 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0211515910923481, "epoch": 10.12, "learning_rate": 2.9330710679076916e-05, "loss": 0.0267, "step": 10653, "task_loss": 0.07670915871858597 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011677171103656292, "epoch": 10.12, "learning_rate": 2.9320216129786116e-05, "loss": 0.0113, "step": 10654, "task_loss": 0.007633142173290253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03487570211291313, "epoch": 10.12, "learning_rate": 2.9309720795707257e-05, "loss": 0.0425, "step": 10655, "task_loss": 0.11129167675971985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014407580718398094, "epoch": 10.12, "learning_rate": 2.9299224678746855e-05, "loss": 0.0138, "step": 10656, "task_loss": 0.008088191971182823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014523474499583244, "epoch": 10.12, "learning_rate": 2.928872778081158e-05, "loss": 0.0136, "step": 10657, "task_loss": 0.005704553797841072 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05787359178066254, "epoch": 10.12, "learning_rate": 2.9278230103808257e-05, "loss": 0.0582, "step": 10658, "task_loss": 0.061048611998558044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020860906690359116, "epoch": 10.12, "learning_rate": 2.9267731649643827e-05, "loss": 0.0373, "step": 10659, "task_loss": 0.18554821610450745 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015227505937218666, "epoch": 10.12, "learning_rate": 2.9257232420225394e-05, "loss": 0.0218, "step": 10660, "task_loss": 0.08097986876964569 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013915725983679295, "epoch": 10.12, "learning_rate": 2.9246732417460178e-05, "loss": 0.0131, "step": 10661, "task_loss": 0.005335215479135513 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015429834835231304, "epoch": 10.13, "learning_rate": 2.9236231643255578e-05, "loss": 0.0209, "step": 10662, "task_loss": 0.07019021362066269 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016493186354637146, "epoch": 10.13, "learning_rate": 2.922573009951909e-05, "loss": 0.0259, "step": 10663, "task_loss": 0.11039119958877563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016565848141908646, "epoch": 10.13, "learning_rate": 2.9215227788158382e-05, "loss": 0.0183, "step": 10664, "task_loss": 0.03420030698180199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021225640550255775, "epoch": 10.13, "learning_rate": 2.920472471108125e-05, "loss": 0.0269, "step": 10665, "task_loss": 0.07766622304916382 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06270953267812729, "epoch": 10.13, "learning_rate": 2.919422087019561e-05, "loss": 0.0811, "step": 10666, "task_loss": 0.24679216742515564 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00949104130268097, "epoch": 10.13, "learning_rate": 2.9183716267409562e-05, "loss": 0.0097, "step": 10667, "task_loss": 0.011834444478154182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016700657084584236, "epoch": 10.13, "learning_rate": 2.9173210904631297e-05, "loss": 0.0192, "step": 10668, "task_loss": 0.04135492444038391 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019157694652676582, "epoch": 10.13, "learning_rate": 2.916270478376918e-05, "loss": 0.018, "step": 10669, "task_loss": 0.00787227414548397 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015583924949169159, "epoch": 10.13, "learning_rate": 2.9152197906731687e-05, "loss": 0.0191, "step": 10670, "task_loss": 0.05046524479985237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0134699996560812, "epoch": 10.13, "learning_rate": 2.9141690275427445e-05, "loss": 0.0137, "step": 10671, "task_loss": 0.015451043844223022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023017989471554756, "epoch": 10.13, "learning_rate": 2.9131181891765226e-05, "loss": 0.0218, "step": 10672, "task_loss": 0.011321371421217918 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.045944422483444214, "epoch": 10.14, "learning_rate": 2.9120672757653916e-05, "loss": 0.0454, "step": 10673, "task_loss": 0.04038790985941887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05254145711660385, "epoch": 10.14, "learning_rate": 2.9110162875002552e-05, "loss": 0.0481, "step": 10674, "task_loss": 0.0084177665412426 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028896521776914597, "epoch": 10.14, "learning_rate": 2.909965224572031e-05, "loss": 0.0289, "step": 10675, "task_loss": 0.02889561466872692 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03066609799861908, "epoch": 10.14, "learning_rate": 2.9089140871716492e-05, "loss": 0.0331, "step": 10676, "task_loss": 0.05538489297032356 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011451882310211658, "epoch": 10.14, "learning_rate": 2.9078628754900543e-05, "loss": 0.019, "step": 10677, "task_loss": 0.08714932203292847 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01488979160785675, "epoch": 10.14, "learning_rate": 2.9068115897182036e-05, "loss": 0.0221, "step": 10678, "task_loss": 0.08699451386928558 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00719775166362524, "epoch": 10.14, "learning_rate": 2.905760230047068e-05, "loss": 0.0132, "step": 10679, "task_loss": 0.06723162531852722 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011093568056821823, "epoch": 10.14, "learning_rate": 2.9047087966676327e-05, "loss": 0.0141, "step": 10680, "task_loss": 0.040885455906391144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03617207705974579, "epoch": 10.14, "learning_rate": 2.903657289770896e-05, "loss": 0.0479, "step": 10681, "task_loss": 0.15319961309432983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018763557076454163, "epoch": 10.14, "learning_rate": 2.902605709547868e-05, "loss": 0.0249, "step": 10682, "task_loss": 0.08051219582557678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016470609232783318, "epoch": 10.15, "learning_rate": 2.9015540561895738e-05, "loss": 0.0278, "step": 10683, "task_loss": 0.12958627939224243 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10346265137195587, "epoch": 10.15, "learning_rate": 2.9005023298870514e-05, "loss": 0.1048, "step": 10684, "task_loss": 0.11723846197128296 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.064060278236866, "epoch": 10.15, "learning_rate": 2.8994505308313523e-05, "loss": 0.0662, "step": 10685, "task_loss": 0.08514577150344849 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02188374474644661, "epoch": 10.15, "learning_rate": 2.8983986592135404e-05, "loss": 0.025, "step": 10686, "task_loss": 0.05310831218957901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020624695345759392, "epoch": 10.15, "learning_rate": 2.897346715224693e-05, "loss": 0.0241, "step": 10687, "task_loss": 0.055748291313648224 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06058871001005173, "epoch": 10.15, "learning_rate": 2.8962946990559013e-05, "loss": 0.0679, "step": 10688, "task_loss": 0.13335812091827393 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009769352152943611, "epoch": 10.15, "learning_rate": 2.8952426108982693e-05, "loss": 0.0172, "step": 10689, "task_loss": 0.08371652662754059 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016979005187749863, "epoch": 10.15, "learning_rate": 2.8941904509429134e-05, "loss": 0.0216, "step": 10690, "task_loss": 0.06295529007911682 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020230475813150406, "epoch": 10.15, "learning_rate": 2.8931382193809635e-05, "loss": 0.0353, "step": 10691, "task_loss": 0.1709280014038086 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.041637878865003586, "epoch": 10.15, "learning_rate": 2.8920859164035625e-05, "loss": 0.0392, "step": 10692, "task_loss": 0.017362238839268684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04988161846995354, "epoch": 10.15, "learning_rate": 2.8910335422018664e-05, "loss": 0.048, "step": 10693, "task_loss": 0.031166965141892433 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021832330152392387, "epoch": 10.16, "learning_rate": 2.8899810969670448e-05, "loss": 0.0259, "step": 10694, "task_loss": 0.06244866922497749 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.033625852316617966, "epoch": 10.16, "learning_rate": 2.8889285808902784e-05, "loss": 0.0352, "step": 10695, "task_loss": 0.04985510930418968 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01881205290555954, "epoch": 10.16, "learning_rate": 2.887875994162762e-05, "loss": 0.0177, "step": 10696, "task_loss": 0.007423171773552895 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031309157609939575, "epoch": 10.16, "learning_rate": 2.886823336975703e-05, "loss": 0.0379, "step": 10697, "task_loss": 0.09693950414657593 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02167995274066925, "epoch": 10.16, "learning_rate": 2.885770609520323e-05, "loss": 0.025, "step": 10698, "task_loss": 0.055332060903310776 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020276514813303947, "epoch": 10.16, "learning_rate": 2.8847178119878527e-05, "loss": 0.0265, "step": 10699, "task_loss": 0.08290567249059677 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029720699414610863, "epoch": 10.16, "learning_rate": 2.883664944569539e-05, "loss": 0.0503, "step": 10700, "task_loss": 0.2355862855911255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.037131913006305695, "epoch": 10.16, "learning_rate": 2.8826120074566414e-05, "loss": 0.0397, "step": 10701, "task_loss": 0.06265319883823395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02705603837966919, "epoch": 10.16, "learning_rate": 2.8815590008404293e-05, "loss": 0.0279, "step": 10702, "task_loss": 0.03594599664211273 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02101530134677887, "epoch": 10.16, "learning_rate": 2.8805059249121874e-05, "loss": 0.0222, "step": 10703, "task_loss": 0.03242805600166321 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01382765918970108, "epoch": 10.17, "learning_rate": 2.8794527798632117e-05, "loss": 0.023, "step": 10704, "task_loss": 0.10595635324716568 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018166912719607353, "epoch": 10.17, "learning_rate": 2.8783995658848105e-05, "loss": 0.023, "step": 10705, "task_loss": 0.06630286574363708 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.042599309235811234, "epoch": 10.17, "learning_rate": 2.877346283168306e-05, "loss": 0.0468, "step": 10706, "task_loss": 0.08451828360557556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039667222648859024, "epoch": 10.17, "learning_rate": 2.876292931905032e-05, "loss": 0.0363, "step": 10707, "task_loss": 0.0058592017740011215 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.046238042414188385, "epoch": 10.17, "learning_rate": 2.875239512286335e-05, "loss": 0.0441, "step": 10708, "task_loss": 0.024881619960069656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010262496769428253, "epoch": 10.17, "learning_rate": 2.8741860245035722e-05, "loss": 0.0095, "step": 10709, "task_loss": 0.0028820428997278214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029114559292793274, "epoch": 10.17, "learning_rate": 2.8731324687481176e-05, "loss": 0.0331, "step": 10710, "task_loss": 0.06866513192653656 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.047519125044345856, "epoch": 10.17, "learning_rate": 2.8720788452113517e-05, "loss": 0.0558, "step": 10711, "task_loss": 0.13019928336143494 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03364911675453186, "epoch": 10.17, "learning_rate": 2.8710251540846723e-05, "loss": 0.0376, "step": 10712, "task_loss": 0.07327281683683395 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017668629065155983, "epoch": 10.17, "learning_rate": 2.8699713955594864e-05, "loss": 0.019, "step": 10713, "task_loss": 0.03053228370845318 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04549794644117355, "epoch": 10.17, "learning_rate": 2.8689175698272147e-05, "loss": 0.0501, "step": 10714, "task_loss": 0.09190307557582855 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01674514450132847, "epoch": 10.18, "learning_rate": 2.8678636770792906e-05, "loss": 0.0203, "step": 10715, "task_loss": 0.05194873362779617 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018080132082104683, "epoch": 10.18, "learning_rate": 2.8668097175071572e-05, "loss": 0.0263, "step": 10716, "task_loss": 0.10066099464893341 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03006262332201004, "epoch": 10.18, "learning_rate": 2.865755691302272e-05, "loss": 0.0329, "step": 10717, "task_loss": 0.05870826542377472 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015684347599744797, "epoch": 10.18, "learning_rate": 2.864701598656104e-05, "loss": 0.0182, "step": 10718, "task_loss": 0.040366411209106445 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010996408760547638, "epoch": 10.18, "learning_rate": 2.8636474397601343e-05, "loss": 0.0106, "step": 10719, "task_loss": 0.00703669898211956 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03208538889884949, "epoch": 10.18, "learning_rate": 2.862593214805856e-05, "loss": 0.0318, "step": 10720, "task_loss": 0.029489625245332718 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04310718923807144, "epoch": 10.18, "learning_rate": 2.8615389239847734e-05, "loss": 0.0414, "step": 10721, "task_loss": 0.025636808946728706 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.061272382736206055, "epoch": 10.18, "learning_rate": 2.8604845674884045e-05, "loss": 0.0585, "step": 10722, "task_loss": 0.03352653980255127 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01941574551165104, "epoch": 10.18, "learning_rate": 2.8594301455082777e-05, "loss": 0.0198, "step": 10723, "task_loss": 0.023156698793172836 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013191182166337967, "epoch": 10.18, "learning_rate": 2.8583756582359338e-05, "loss": 0.0123, "step": 10724, "task_loss": 0.004337495192885399 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01237378641963005, "epoch": 10.19, "learning_rate": 2.8573211058629262e-05, "loss": 0.0121, "step": 10725, "task_loss": 0.009604766964912415 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020672522485256195, "epoch": 10.19, "learning_rate": 2.8562664885808176e-05, "loss": 0.0237, "step": 10726, "task_loss": 0.050802476704120636 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018164366483688354, "epoch": 10.19, "learning_rate": 2.8552118065811868e-05, "loss": 0.0254, "step": 10727, "task_loss": 0.09089714288711548 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012251758947968483, "epoch": 10.19, "learning_rate": 2.85415706005562e-05, "loss": 0.0149, "step": 10728, "task_loss": 0.03885126858949661 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021103456616401672, "epoch": 10.19, "learning_rate": 2.8531022491957178e-05, "loss": 0.0238, "step": 10729, "task_loss": 0.048550285398960114 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021369164809584618, "epoch": 10.19, "learning_rate": 2.852047374193092e-05, "loss": 0.0323, "step": 10730, "task_loss": 0.1303352415561676 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013989603146910667, "epoch": 10.19, "learning_rate": 2.850992435239364e-05, "loss": 0.0133, "step": 10731, "task_loss": 0.006928419694304466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03571196272969246, "epoch": 10.19, "learning_rate": 2.8499374325261708e-05, "loss": 0.0377, "step": 10732, "task_loss": 0.05529388412833214 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01848205178976059, "epoch": 10.19, "learning_rate": 2.848882366245157e-05, "loss": 0.023, "step": 10733, "task_loss": 0.06377825886011124 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014538046903908253, "epoch": 10.19, "learning_rate": 2.847827236587982e-05, "loss": 0.0203, "step": 10734, "task_loss": 0.07199336588382721 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026060445234179497, "epoch": 10.19, "learning_rate": 2.846772043746313e-05, "loss": 0.0239, "step": 10735, "task_loss": 0.004935260862112045 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02200356312096119, "epoch": 10.2, "learning_rate": 2.845716787911833e-05, "loss": 0.0247, "step": 10736, "task_loss": 0.04902653768658638 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.10178372263908386, "epoch": 10.2, "learning_rate": 2.8446614692762336e-05, "loss": 0.0991, "step": 10737, "task_loss": 0.07502306997776031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03487898409366608, "epoch": 10.2, "learning_rate": 2.843606088031218e-05, "loss": 0.0367, "step": 10738, "task_loss": 0.05333896726369858 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014870396815240383, "epoch": 10.2, "learning_rate": 2.842550644368502e-05, "loss": 0.0235, "step": 10739, "task_loss": 0.1009284108877182 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017139773815870285, "epoch": 10.2, "learning_rate": 2.841495138479811e-05, "loss": 0.0162, "step": 10740, "task_loss": 0.008153453469276428 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02908232994377613, "epoch": 10.2, "learning_rate": 2.8404395705568848e-05, "loss": 0.0271, "step": 10741, "task_loss": 0.008917009457945824 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04850727319717407, "epoch": 10.2, "learning_rate": 2.8393839407914702e-05, "loss": 0.0502, "step": 10742, "task_loss": 0.06578432023525238 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017160603776574135, "epoch": 10.2, "learning_rate": 2.8383282493753283e-05, "loss": 0.0163, "step": 10743, "task_loss": 0.008876651525497437 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03274647891521454, "epoch": 10.2, "learning_rate": 2.83727249650023e-05, "loss": 0.0303, "step": 10744, "task_loss": 0.008528593927621841 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023494988679885864, "epoch": 10.2, "learning_rate": 2.836216682357959e-05, "loss": 0.0296, "step": 10745, "task_loss": 0.08411206305027008 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.045075200498104095, "epoch": 10.21, "learning_rate": 2.8351608071403085e-05, "loss": 0.0411, "step": 10746, "task_loss": 0.005580326542258263 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07080404460430145, "epoch": 10.21, "learning_rate": 2.8341048710390832e-05, "loss": 0.0653, "step": 10747, "task_loss": 0.016258470714092255 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02719147875905037, "epoch": 10.21, "learning_rate": 2.8330488742460987e-05, "loss": 0.0254, "step": 10748, "task_loss": 0.009210776537656784 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03383823484182358, "epoch": 10.21, "learning_rate": 2.8319928169531825e-05, "loss": 0.0556, "step": 10749, "task_loss": 0.2511603534221649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014685697853565216, "epoch": 10.21, "learning_rate": 2.830936699352172e-05, "loss": 0.0209, "step": 10750, "task_loss": 0.07651330530643463 }, { "epoch": 10.21, "eval_accuracy": 0.9025229357798165, "eval_loss": 0.4256412982940674, "eval_runtime": 18.2731, "eval_samples_per_second": 47.721, "eval_steps_per_second": 5.965, "step": 10750 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014020141214132309, "epoch": 10.21, "learning_rate": 2.8298805216349167e-05, "loss": 0.021, "step": 10751, "task_loss": 0.08416274189949036 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03295741230249405, "epoch": 10.21, "learning_rate": 2.8288242839932744e-05, "loss": 0.0485, "step": 10752, "task_loss": 0.18834072351455688 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013736523687839508, "epoch": 10.21, "learning_rate": 2.8277679866191194e-05, "loss": 0.0133, "step": 10753, "task_loss": 0.009392468258738518 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05337664112448692, "epoch": 10.21, "learning_rate": 2.8267116297043294e-05, "loss": 0.0524, "step": 10754, "task_loss": 0.0431857630610466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06362185627222061, "epoch": 10.21, "learning_rate": 2.8256552134407993e-05, "loss": 0.0686, "step": 10755, "task_loss": 0.11329066753387451 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.050114959478378296, "epoch": 10.21, "learning_rate": 2.8245987380204313e-05, "loss": 0.0566, "step": 10756, "task_loss": 0.11529053747653961 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03267570957541466, "epoch": 10.22, "learning_rate": 2.8235422036351382e-05, "loss": 0.0373, "step": 10757, "task_loss": 0.07926565408706665 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013439756818115711, "epoch": 10.22, "learning_rate": 2.822485610476847e-05, "loss": 0.013, "step": 10758, "task_loss": 0.008762186393141747 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02206001617014408, "epoch": 10.22, "learning_rate": 2.8214289587374908e-05, "loss": 0.0288, "step": 10759, "task_loss": 0.08915533125400543 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01932983100414276, "epoch": 10.22, "learning_rate": 2.8203722486090168e-05, "loss": 0.0201, "step": 10760, "task_loss": 0.026729634031653404 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012917459942400455, "epoch": 10.22, "learning_rate": 2.8193154802833803e-05, "loss": 0.0127, "step": 10761, "task_loss": 0.010323688387870789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022919047623872757, "epoch": 10.22, "learning_rate": 2.818258653952549e-05, "loss": 0.0217, "step": 10762, "task_loss": 0.01115039736032486 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01916324719786644, "epoch": 10.22, "learning_rate": 2.8172017698085013e-05, "loss": 0.018, "step": 10763, "task_loss": 0.00737486407160759 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01026608981192112, "epoch": 10.22, "learning_rate": 2.816144828043224e-05, "loss": 0.0163, "step": 10764, "task_loss": 0.07105138152837753 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018040932714939117, "epoch": 10.22, "learning_rate": 2.8150878288487155e-05, "loss": 0.0168, "step": 10765, "task_loss": 0.0060977693647146225 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012006964534521103, "epoch": 10.22, "learning_rate": 2.8140307724169857e-05, "loss": 0.0113, "step": 10766, "task_loss": 0.004458732903003693 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.036806024610996246, "epoch": 10.23, "learning_rate": 2.812973658940054e-05, "loss": 0.0391, "step": 10767, "task_loss": 0.05974595993757248 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.039469681680202484, "epoch": 10.23, "learning_rate": 2.8119164886099504e-05, "loss": 0.048, "step": 10768, "task_loss": 0.12487009167671204 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011058198288083076, "epoch": 10.23, "learning_rate": 2.8108592616187133e-05, "loss": 0.0105, "step": 10769, "task_loss": 0.005702011287212372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01453112531453371, "epoch": 10.23, "learning_rate": 2.8098019781583944e-05, "loss": 0.0201, "step": 10770, "task_loss": 0.07058731466531754 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01812460459768772, "epoch": 10.23, "learning_rate": 2.8087446384210547e-05, "loss": 0.0173, "step": 10771, "task_loss": 0.01019604504108429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014030968770384789, "epoch": 10.23, "learning_rate": 2.8076872425987637e-05, "loss": 0.0138, "step": 10772, "task_loss": 0.011378861963748932 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02499603107571602, "epoch": 10.23, "learning_rate": 2.8066297908836043e-05, "loss": 0.0273, "step": 10773, "task_loss": 0.04773323982954025 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04917794466018677, "epoch": 10.23, "learning_rate": 2.8055722834676658e-05, "loss": 0.0502, "step": 10774, "task_loss": 0.05955827608704567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04151075705885887, "epoch": 10.23, "learning_rate": 2.804514720543051e-05, "loss": 0.0442, "step": 10775, "task_loss": 0.06856313347816467 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06127103045582771, "epoch": 10.23, "learning_rate": 2.80345710230187e-05, "loss": 0.0622, "step": 10776, "task_loss": 0.07090768218040466 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015070604160428047, "epoch": 10.23, "learning_rate": 2.802399428936246e-05, "loss": 0.0141, "step": 10777, "task_loss": 0.004883896559476852 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017283853143453598, "epoch": 10.24, "learning_rate": 2.8013417006383076e-05, "loss": 0.0266, "step": 10778, "task_loss": 0.11040695756673813 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018551714718341827, "epoch": 10.24, "learning_rate": 2.8002839176001987e-05, "loss": 0.0171, "step": 10779, "task_loss": 0.004415277391672134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02501179277896881, "epoch": 10.24, "learning_rate": 2.799226080014071e-05, "loss": 0.0422, "step": 10780, "task_loss": 0.19670268893241882 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012673512101173401, "epoch": 10.24, "learning_rate": 2.7981681880720838e-05, "loss": 0.0133, "step": 10781, "task_loss": 0.018554171547293663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014598320238292217, "epoch": 10.24, "learning_rate": 2.7971102419664103e-05, "loss": 0.0141, "step": 10782, "task_loss": 0.009689368307590485 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014966873452067375, "epoch": 10.24, "learning_rate": 2.7960522418892288e-05, "loss": 0.0262, "step": 10783, "task_loss": 0.12777450680732727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01800619065761566, "epoch": 10.24, "learning_rate": 2.794994188032733e-05, "loss": 0.0173, "step": 10784, "task_loss": 0.011067090556025505 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01135667972266674, "epoch": 10.24, "learning_rate": 2.7939360805891218e-05, "loss": 0.0107, "step": 10785, "task_loss": 0.004777856171131134 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025919828563928604, "epoch": 10.24, "learning_rate": 2.7928779197506056e-05, "loss": 0.0289, "step": 10786, "task_loss": 0.05572760850191116 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016921542584896088, "epoch": 10.24, "learning_rate": 2.7918197057094054e-05, "loss": 0.0157, "step": 10787, "task_loss": 0.004976712167263031 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025802042335271835, "epoch": 10.25, "learning_rate": 2.7907614386577497e-05, "loss": 0.0428, "step": 10788, "task_loss": 0.19616416096687317 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008495388552546501, "epoch": 10.25, "learning_rate": 2.789703118787879e-05, "loss": 0.0079, "step": 10789, "task_loss": 0.002873443067073822 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016510333865880966, "epoch": 10.25, "learning_rate": 2.7886447462920412e-05, "loss": 0.0287, "step": 10790, "task_loss": 0.13880857825279236 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03891441598534584, "epoch": 10.25, "learning_rate": 2.787586321362495e-05, "loss": 0.046, "step": 10791, "task_loss": 0.1098531037569046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012705841101706028, "epoch": 10.25, "learning_rate": 2.7865278441915082e-05, "loss": 0.0149, "step": 10792, "task_loss": 0.034181784838438034 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01141888927668333, "epoch": 10.25, "learning_rate": 2.785469314971359e-05, "loss": 0.0187, "step": 10793, "task_loss": 0.08427157998085022 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017081687226891518, "epoch": 10.25, "learning_rate": 2.7844107338943343e-05, "loss": 0.0201, "step": 10794, "task_loss": 0.0467962883412838 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009645327925682068, "epoch": 10.25, "learning_rate": 2.7833521011527293e-05, "loss": 0.0128, "step": 10795, "task_loss": 0.041607990860939026 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.032230883836746216, "epoch": 10.25, "learning_rate": 2.782293416938851e-05, "loss": 0.0446, "step": 10796, "task_loss": 0.15550780296325684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07105334848165512, "epoch": 10.25, "learning_rate": 2.7812346814450135e-05, "loss": 0.0718, "step": 10797, "task_loss": 0.07855713367462158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013354838825762272, "epoch": 10.25, "learning_rate": 2.7801758948635414e-05, "loss": 0.0164, "step": 10798, "task_loss": 0.04363374784588814 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009988810867071152, "epoch": 10.26, "learning_rate": 2.77911705738677e-05, "loss": 0.0175, "step": 10799, "task_loss": 0.08463583886623383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015538221225142479, "epoch": 10.26, "learning_rate": 2.7780581692070395e-05, "loss": 0.0146, "step": 10800, "task_loss": 0.006343167275190353 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01280184555798769, "epoch": 10.26, "learning_rate": 2.7769992305167043e-05, "loss": 0.0203, "step": 10801, "task_loss": 0.08731023967266083 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05490295588970184, "epoch": 10.26, "learning_rate": 2.775940241508124e-05, "loss": 0.0581, "step": 10802, "task_loss": 0.08693551272153854 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.006106989458203316, "epoch": 10.26, "learning_rate": 2.774881202373671e-05, "loss": 0.0136, "step": 10803, "task_loss": 0.0810597613453865 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05367905646562576, "epoch": 10.26, "learning_rate": 2.773822113305723e-05, "loss": 0.0661, "step": 10804, "task_loss": 0.17820340394973755 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011592602357268333, "epoch": 10.26, "learning_rate": 2.7727629744966695e-05, "loss": 0.0109, "step": 10805, "task_loss": 0.004181232303380966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016554832458496094, "epoch": 10.26, "learning_rate": 2.7717037861389082e-05, "loss": 0.016, "step": 10806, "task_loss": 0.010724140331149101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015016058459877968, "epoch": 10.26, "learning_rate": 2.7706445484248454e-05, "loss": 0.0143, "step": 10807, "task_loss": 0.007696835324168205 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00887630321085453, "epoch": 10.26, "learning_rate": 2.769585261546897e-05, "loss": 0.013, "step": 10808, "task_loss": 0.05019646883010864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02640136145055294, "epoch": 10.26, "learning_rate": 2.768525925697487e-05, "loss": 0.0284, "step": 10809, "task_loss": 0.04624557122588158 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.08202344924211502, "epoch": 10.27, "learning_rate": 2.76746654106905e-05, "loss": 0.0864, "step": 10810, "task_loss": 0.125411257147789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051587365567684174, "epoch": 10.27, "learning_rate": 2.7664071078540282e-05, "loss": 0.0557, "step": 10811, "task_loss": 0.0922786295413971 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01615780219435692, "epoch": 10.27, "learning_rate": 2.7653476262448713e-05, "loss": 0.0155, "step": 10812, "task_loss": 0.009321728721261024 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06020238250494003, "epoch": 10.27, "learning_rate": 2.76428809643404e-05, "loss": 0.0655, "step": 10813, "task_loss": 0.11301226913928986 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025619039312005043, "epoch": 10.27, "learning_rate": 2.763228518614004e-05, "loss": 0.0258, "step": 10814, "task_loss": 0.027688482776284218 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011568024754524231, "epoch": 10.27, "learning_rate": 2.7621688929772393e-05, "loss": 0.0181, "step": 10815, "task_loss": 0.07710108906030655 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.048748280853033066, "epoch": 10.27, "learning_rate": 2.761109219716233e-05, "loss": 0.0455, "step": 10816, "task_loss": 0.016586463898420334 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03522196784615517, "epoch": 10.27, "learning_rate": 2.760049499023479e-05, "loss": 0.0463, "step": 10817, "task_loss": 0.1458013653755188 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010583333671092987, "epoch": 10.27, "learning_rate": 2.7589897310914814e-05, "loss": 0.0098, "step": 10818, "task_loss": 0.002439044415950775 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0179889015853405, "epoch": 10.27, "learning_rate": 2.7579299161127513e-05, "loss": 0.0167, "step": 10819, "task_loss": 0.0054456982761621475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014011922292411327, "epoch": 10.28, "learning_rate": 2.756870054279811e-05, "loss": 0.0139, "step": 10820, "task_loss": 0.01266825757920742 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020268365740776062, "epoch": 10.28, "learning_rate": 2.755810145785187e-05, "loss": 0.0187, "step": 10821, "task_loss": 0.004288617521524429 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025478918105363846, "epoch": 10.28, "learning_rate": 2.754750190821418e-05, "loss": 0.025, "step": 10822, "task_loss": 0.020617567002773285 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021658936515450478, "epoch": 10.28, "learning_rate": 2.753690189581051e-05, "loss": 0.0199, "step": 10823, "task_loss": 0.004049813374876976 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06283696740865707, "epoch": 10.28, "learning_rate": 2.752630142256638e-05, "loss": 0.0597, "step": 10824, "task_loss": 0.03175070881843567 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011753297410905361, "epoch": 10.28, "learning_rate": 2.7515700490407443e-05, "loss": 0.0111, "step": 10825, "task_loss": 0.00544341653585434 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09492142498493195, "epoch": 10.28, "learning_rate": 2.7505099101259386e-05, "loss": 0.0944, "step": 10826, "task_loss": 0.08998773992061615 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007941950112581253, "epoch": 10.28, "learning_rate": 2.749449725704802e-05, "loss": 0.0077, "step": 10827, "task_loss": 0.005958493798971176 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018316073343157768, "epoch": 10.28, "learning_rate": 2.748389495969921e-05, "loss": 0.0302, "step": 10828, "task_loss": 0.1371515393257141 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022628696635365486, "epoch": 10.28, "learning_rate": 2.747329221113891e-05, "loss": 0.0268, "step": 10829, "task_loss": 0.06442567706108093 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012505222111940384, "epoch": 10.28, "learning_rate": 2.7462689013293176e-05, "loss": 0.0118, "step": 10830, "task_loss": 0.005520684644579887 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02343965508043766, "epoch": 10.29, "learning_rate": 2.745208536808812e-05, "loss": 0.0239, "step": 10831, "task_loss": 0.027552150189876556 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05450985208153725, "epoch": 10.29, "learning_rate": 2.7441481277449954e-05, "loss": 0.06, "step": 10832, "task_loss": 0.10902714729309082 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023538943380117416, "epoch": 10.29, "learning_rate": 2.743087674330495e-05, "loss": 0.0287, "step": 10833, "task_loss": 0.07561871409416199 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01717299409210682, "epoch": 10.29, "learning_rate": 2.742027176757948e-05, "loss": 0.0172, "step": 10834, "task_loss": 0.01727975904941559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02774146758019924, "epoch": 10.29, "learning_rate": 2.7409666352199986e-05, "loss": 0.0257, "step": 10835, "task_loss": 0.007454710081219673 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020189553499221802, "epoch": 10.29, "learning_rate": 2.7399060499092992e-05, "loss": 0.0191, "step": 10836, "task_loss": 0.009731443598866463 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06831050664186478, "epoch": 10.29, "learning_rate": 2.7388454210185115e-05, "loss": 0.067, "step": 10837, "task_loss": 0.055503882467746735 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013829356990754604, "epoch": 10.29, "learning_rate": 2.7377847487403018e-05, "loss": 0.0128, "step": 10838, "task_loss": 0.003266597166657448 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014532563276588917, "epoch": 10.29, "learning_rate": 2.736724033267347e-05, "loss": 0.0153, "step": 10839, "task_loss": 0.021880408748984337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05628987401723862, "epoch": 10.29, "learning_rate": 2.7356632747923322e-05, "loss": 0.0626, "step": 10840, "task_loss": 0.11941660940647125 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.051775701344013214, "epoch": 10.3, "learning_rate": 2.7346024735079486e-05, "loss": 0.0678, "step": 10841, "task_loss": 0.21175605058670044 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027265068143606186, "epoch": 10.3, "learning_rate": 2.7335416296068962e-05, "loss": 0.0256, "step": 10842, "task_loss": 0.010892918333411217 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028751587495207787, "epoch": 10.3, "learning_rate": 2.7324807432818805e-05, "loss": 0.0302, "step": 10843, "task_loss": 0.043142762035131454 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01213914155960083, "epoch": 10.3, "learning_rate": 2.731419814725619e-05, "loss": 0.0111, "step": 10844, "task_loss": 0.002093670889735222 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021539079025387764, "epoch": 10.3, "learning_rate": 2.730358844130834e-05, "loss": 0.0212, "step": 10845, "task_loss": 0.018369406461715698 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03298931196331978, "epoch": 10.3, "learning_rate": 2.729297831690255e-05, "loss": 0.0398, "step": 10846, "task_loss": 0.10142332315444946 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016631023958325386, "epoch": 10.3, "learning_rate": 2.728236777596621e-05, "loss": 0.017, "step": 10847, "task_loss": 0.020175570622086525 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011871835216879845, "epoch": 10.3, "learning_rate": 2.7271756820426763e-05, "loss": 0.0125, "step": 10848, "task_loss": 0.017757927998900414 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0549296960234642, "epoch": 10.3, "learning_rate": 2.7261145452211763e-05, "loss": 0.0602, "step": 10849, "task_loss": 0.10732554644346237 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020789777860045433, "epoch": 10.3, "learning_rate": 2.725053367324879e-05, "loss": 0.0202, "step": 10850, "task_loss": 0.014408687129616737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011434385553002357, "epoch": 10.3, "learning_rate": 2.723992148546554e-05, "loss": 0.0134, "step": 10851, "task_loss": 0.031306661665439606 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014954166486859322, "epoch": 10.31, "learning_rate": 2.7229308890789767e-05, "loss": 0.0181, "step": 10852, "task_loss": 0.046106450259685516 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010979441925883293, "epoch": 10.31, "learning_rate": 2.7218695891149293e-05, "loss": 0.0102, "step": 10853, "task_loss": 0.003026876598596573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015950333327054977, "epoch": 10.31, "learning_rate": 2.720808248847203e-05, "loss": 0.0345, "step": 10854, "task_loss": 0.2015371024608612 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02772637829184532, "epoch": 10.31, "learning_rate": 2.719746868468595e-05, "loss": 0.0369, "step": 10855, "task_loss": 0.11981463432312012 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031578004360198975, "epoch": 10.31, "learning_rate": 2.7186854481719092e-05, "loss": 0.0397, "step": 10856, "task_loss": 0.11244003474712372 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007710220292210579, "epoch": 10.31, "learning_rate": 2.7176239881499595e-05, "loss": 0.0072, "step": 10857, "task_loss": 0.002641640603542328 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009416783228516579, "epoch": 10.31, "learning_rate": 2.716562488595563e-05, "loss": 0.0152, "step": 10858, "task_loss": 0.06755001842975616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06231432408094406, "epoch": 10.31, "learning_rate": 2.715500949701549e-05, "loss": 0.0701, "step": 10859, "task_loss": 0.14002208411693573 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027409039437770844, "epoch": 10.31, "learning_rate": 2.7144393716607486e-05, "loss": 0.0394, "step": 10860, "task_loss": 0.147533118724823 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04957464337348938, "epoch": 10.31, "learning_rate": 2.713377754666004e-05, "loss": 0.0626, "step": 10861, "task_loss": 0.17979982495307922 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05504240095615387, "epoch": 10.32, "learning_rate": 2.712316098910162e-05, "loss": 0.0618, "step": 10862, "task_loss": 0.12275400012731552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015080356039106846, "epoch": 10.32, "learning_rate": 2.711254404586079e-05, "loss": 0.0287, "step": 10863, "task_loss": 0.1511533409357071 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008478467352688313, "epoch": 10.32, "learning_rate": 2.7101926718866156e-05, "loss": 0.014, "step": 10864, "task_loss": 0.06384154409170151 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.1419282853603363, "epoch": 10.32, "learning_rate": 2.7091309010046408e-05, "loss": 0.1438, "step": 10865, "task_loss": 0.1609189212322235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018446022644639015, "epoch": 10.32, "learning_rate": 2.708069092133031e-05, "loss": 0.0171, "step": 10866, "task_loss": 0.005260376259684563 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0161033496260643, "epoch": 10.32, "learning_rate": 2.7070072454646683e-05, "loss": 0.0208, "step": 10867, "task_loss": 0.06297904253005981 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06171935796737671, "epoch": 10.32, "learning_rate": 2.7059453611924433e-05, "loss": 0.0676, "step": 10868, "task_loss": 0.12061215192079544 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.028885137289762497, "epoch": 10.32, "learning_rate": 2.7048834395092505e-05, "loss": 0.0334, "step": 10869, "task_loss": 0.07451960444450378 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011872227303683758, "epoch": 10.32, "learning_rate": 2.7038214806079948e-05, "loss": 0.0111, "step": 10870, "task_loss": 0.0037200450897216797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03222740814089775, "epoch": 10.32, "learning_rate": 2.702759484681585e-05, "loss": 0.0405, "step": 10871, "task_loss": 0.11447662115097046 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013929888606071472, "epoch": 10.32, "learning_rate": 2.701697451922939e-05, "loss": 0.0206, "step": 10872, "task_loss": 0.08043880015611649 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013857328332960606, "epoch": 10.33, "learning_rate": 2.7006353825249792e-05, "loss": 0.015, "step": 10873, "task_loss": 0.025718068704009056 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020384974777698517, "epoch": 10.33, "learning_rate": 2.6995732766806354e-05, "loss": 0.0243, "step": 10874, "task_loss": 0.059511590749025345 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010584133677184582, "epoch": 10.33, "learning_rate": 2.6985111345828452e-05, "loss": 0.0111, "step": 10875, "task_loss": 0.01563730277121067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03728438913822174, "epoch": 10.33, "learning_rate": 2.6974489564245513e-05, "loss": 0.038, "step": 10876, "task_loss": 0.044412482529878616 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018023356795310974, "epoch": 10.33, "learning_rate": 2.6963867423987032e-05, "loss": 0.0171, "step": 10877, "task_loss": 0.008735572919249535 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024205505847930908, "epoch": 10.33, "learning_rate": 2.695324492698258e-05, "loss": 0.0222, "step": 10878, "task_loss": 0.004576884210109711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04197827726602554, "epoch": 10.33, "learning_rate": 2.694262207516178e-05, "loss": 0.0406, "step": 10879, "task_loss": 0.028400206938385963 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015484409406781197, "epoch": 10.33, "learning_rate": 2.6931998870454327e-05, "loss": 0.0145, "step": 10880, "task_loss": 0.005911100655794144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014505671337246895, "epoch": 10.33, "learning_rate": 2.692137531478997e-05, "loss": 0.0135, "step": 10881, "task_loss": 0.003982797265052795 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024028435349464417, "epoch": 10.33, "learning_rate": 2.6910751410098532e-05, "loss": 0.0294, "step": 10882, "task_loss": 0.07786644250154495 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05532676726579666, "epoch": 10.34, "learning_rate": 2.6900127158309903e-05, "loss": 0.0686, "step": 10883, "task_loss": 0.18820956349372864 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.00881568156182766, "epoch": 10.34, "learning_rate": 2.688950256135402e-05, "loss": 0.0155, "step": 10884, "task_loss": 0.07542967796325684 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007144515868276358, "epoch": 10.34, "learning_rate": 2.6878877621160904e-05, "loss": 0.0068, "step": 10885, "task_loss": 0.0034852921962738037 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03914850950241089, "epoch": 10.34, "learning_rate": 2.686825233966061e-05, "loss": 0.036, "step": 10886, "task_loss": 0.007247054949402809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03601888567209244, "epoch": 10.34, "learning_rate": 2.6857626718783285e-05, "loss": 0.0516, "step": 10887, "task_loss": 0.19196146726608276 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0399981364607811, "epoch": 10.34, "learning_rate": 2.6847000760459118e-05, "loss": 0.0487, "step": 10888, "task_loss": 0.12683525681495667 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007798383943736553, "epoch": 10.34, "learning_rate": 2.683637446661837e-05, "loss": 0.0138, "step": 10889, "task_loss": 0.06797172874212265 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04516351968050003, "epoch": 10.34, "learning_rate": 2.6825747839191362e-05, "loss": 0.0543, "step": 10890, "task_loss": 0.13696971535682678 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05506161227822304, "epoch": 10.34, "learning_rate": 2.681512088010845e-05, "loss": 0.0671, "step": 10891, "task_loss": 0.1754119098186493 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034937888383865356, "epoch": 10.34, "learning_rate": 2.6804493591300105e-05, "loss": 0.0372, "step": 10892, "task_loss": 0.05717271566390991 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01608917862176895, "epoch": 10.34, "learning_rate": 2.6793865974696803e-05, "loss": 0.0375, "step": 10893, "task_loss": 0.23010924458503723 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0070289126597344875, "epoch": 10.35, "learning_rate": 2.67832380322291e-05, "loss": 0.0152, "step": 10894, "task_loss": 0.0884644091129303 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02088269032537937, "epoch": 10.35, "learning_rate": 2.6772609765827627e-05, "loss": 0.0205, "step": 10895, "task_loss": 0.016729770228266716 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021521752700209618, "epoch": 10.35, "learning_rate": 2.6761981177423052e-05, "loss": 0.0341, "step": 10896, "task_loss": 0.14689095318317413 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.030128255486488342, "epoch": 10.35, "learning_rate": 2.6751352268946118e-05, "loss": 0.0355, "step": 10897, "task_loss": 0.08407264947891235 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01246646698564291, "epoch": 10.35, "learning_rate": 2.6740723042327598e-05, "loss": 0.0171, "step": 10898, "task_loss": 0.0584290474653244 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01876392588019371, "epoch": 10.35, "learning_rate": 2.673009349949836e-05, "loss": 0.0292, "step": 10899, "task_loss": 0.12266287952661514 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03964090347290039, "epoch": 10.35, "learning_rate": 2.6719463642389302e-05, "loss": 0.0393, "step": 10900, "task_loss": 0.036599088460206985 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020488981157541275, "epoch": 10.35, "learning_rate": 2.6708833472931394e-05, "loss": 0.0342, "step": 10901, "task_loss": 0.15787988901138306 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07471579313278198, "epoch": 10.35, "learning_rate": 2.669820299305566e-05, "loss": 0.0884, "step": 10902, "task_loss": 0.21201379597187042 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.027081774547696114, "epoch": 10.35, "learning_rate": 2.6687572204693174e-05, "loss": 0.0261, "step": 10903, "task_loss": 0.017036577686667442 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026169799268245697, "epoch": 10.36, "learning_rate": 2.667694110977506e-05, "loss": 0.0241, "step": 10904, "task_loss": 0.005672993138432503 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020091958343982697, "epoch": 10.36, "learning_rate": 2.6666309710232522e-05, "loss": 0.0294, "step": 10905, "task_loss": 0.11267304420471191 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.006835642736405134, "epoch": 10.36, "learning_rate": 2.6655678007996804e-05, "loss": 0.0068, "step": 10906, "task_loss": 0.006012851372361183 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05445358157157898, "epoch": 10.36, "learning_rate": 2.66450460049992e-05, "loss": 0.0629, "step": 10907, "task_loss": 0.13931649923324585 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024199290201067924, "epoch": 10.36, "learning_rate": 2.6634413703171058e-05, "loss": 0.0294, "step": 10908, "task_loss": 0.07624688744544983 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014362799003720284, "epoch": 10.36, "learning_rate": 2.6623781104443806e-05, "loss": 0.0138, "step": 10909, "task_loss": 0.009138602763414383 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.029530849307775497, "epoch": 10.36, "learning_rate": 2.6613148210748894e-05, "loss": 0.0279, "step": 10910, "task_loss": 0.01313771866261959 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04332811385393143, "epoch": 10.36, "learning_rate": 2.6602515024017842e-05, "loss": 0.045, "step": 10911, "task_loss": 0.06038268655538559 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0265837199985981, "epoch": 10.36, "learning_rate": 2.6591881546182216e-05, "loss": 0.0263, "step": 10912, "task_loss": 0.024174809455871582 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023015733808279037, "epoch": 10.36, "learning_rate": 2.6581247779173635e-05, "loss": 0.0233, "step": 10913, "task_loss": 0.026104921475052834 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02378995530307293, "epoch": 10.36, "learning_rate": 2.6570613724923788e-05, "loss": 0.0292, "step": 10914, "task_loss": 0.07820422202348709 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011787974275648594, "epoch": 10.37, "learning_rate": 2.655997938536439e-05, "loss": 0.0193, "step": 10915, "task_loss": 0.08679046481847763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023985158652067184, "epoch": 10.37, "learning_rate": 2.654934476242723e-05, "loss": 0.0221, "step": 10916, "task_loss": 0.005446845665574074 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.025475893169641495, "epoch": 10.37, "learning_rate": 2.653870985804412e-05, "loss": 0.024, "step": 10917, "task_loss": 0.010533835738897324 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04181814193725586, "epoch": 10.37, "learning_rate": 2.6528074674146963e-05, "loss": 0.052, "step": 10918, "task_loss": 0.14384526014328003 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020111829042434692, "epoch": 10.37, "learning_rate": 2.6517439212667677e-05, "loss": 0.0193, "step": 10919, "task_loss": 0.01181393675506115 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016157563775777817, "epoch": 10.37, "learning_rate": 2.6506803475538256e-05, "loss": 0.0182, "step": 10920, "task_loss": 0.03620835393667221 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026654712855815887, "epoch": 10.37, "learning_rate": 2.649616746469072e-05, "loss": 0.0281, "step": 10921, "task_loss": 0.04074002057313919 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014352642931044102, "epoch": 10.37, "learning_rate": 2.648553118205716e-05, "loss": 0.0138, "step": 10922, "task_loss": 0.009018674492835999 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02536664716899395, "epoch": 10.37, "learning_rate": 2.6474894629569713e-05, "loss": 0.0348, "step": 10923, "task_loss": 0.11997328698635101 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016859643161296844, "epoch": 10.37, "learning_rate": 2.6464257809160548e-05, "loss": 0.023, "step": 10924, "task_loss": 0.07873199135065079 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01845327578485012, "epoch": 10.38, "learning_rate": 2.6453620722761896e-05, "loss": 0.022, "step": 10925, "task_loss": 0.05434544384479523 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008304309099912643, "epoch": 10.38, "learning_rate": 2.6442983372306045e-05, "loss": 0.0079, "step": 10926, "task_loss": 0.004198441281914711 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021259469911456108, "epoch": 10.38, "learning_rate": 2.643234575972531e-05, "loss": 0.0265, "step": 10927, "task_loss": 0.07323883473873138 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008786037564277649, "epoch": 10.38, "learning_rate": 2.642170788695208e-05, "loss": 0.0108, "step": 10928, "task_loss": 0.02885708585381508 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04073190689086914, "epoch": 10.38, "learning_rate": 2.6411069755918755e-05, "loss": 0.044, "step": 10929, "task_loss": 0.07315443456172943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02872268483042717, "epoch": 10.38, "learning_rate": 2.6400431368557815e-05, "loss": 0.035, "step": 10930, "task_loss": 0.09104539453983307 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007937128655612469, "epoch": 10.38, "learning_rate": 2.6389792726801778e-05, "loss": 0.0074, "step": 10931, "task_loss": 0.0028884951025247574 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04766330122947693, "epoch": 10.38, "learning_rate": 2.6379153832583186e-05, "loss": 0.0664, "step": 10932, "task_loss": 0.23463010787963867 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03247709199786186, "epoch": 10.38, "learning_rate": 2.6368514687834672e-05, "loss": 0.0347, "step": 10933, "task_loss": 0.05483207106590271 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01322152093052864, "epoch": 10.38, "learning_rate": 2.6357875294488865e-05, "loss": 0.0234, "step": 10934, "task_loss": 0.11500921845436096 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009567633271217346, "epoch": 10.38, "learning_rate": 2.6347235654478482e-05, "loss": 0.0093, "step": 10935, "task_loss": 0.006448997184634209 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014935207553207874, "epoch": 10.39, "learning_rate": 2.6336595769736245e-05, "loss": 0.0213, "step": 10936, "task_loss": 0.07824273407459259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011560074985027313, "epoch": 10.39, "learning_rate": 2.6325955642194948e-05, "loss": 0.011, "step": 10937, "task_loss": 0.005575112998485565 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.031049327924847603, "epoch": 10.39, "learning_rate": 2.6315315273787428e-05, "loss": 0.0339, "step": 10938, "task_loss": 0.059728413820266724 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.06698919832706451, "epoch": 10.39, "learning_rate": 2.630467466644655e-05, "loss": 0.0754, "step": 10939, "task_loss": 0.15107224881649017 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014802630990743637, "epoch": 10.39, "learning_rate": 2.629403382210524e-05, "loss": 0.0185, "step": 10940, "task_loss": 0.051325224339962006 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.008546770550310612, "epoch": 10.39, "learning_rate": 2.628339274269645e-05, "loss": 0.0206, "step": 10941, "task_loss": 0.12929676473140717 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017933053895831108, "epoch": 10.39, "learning_rate": 2.6272751430153186e-05, "loss": 0.0208, "step": 10942, "task_loss": 0.04631277918815613 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03135647252202034, "epoch": 10.39, "learning_rate": 2.62621098864085e-05, "loss": 0.0291, "step": 10943, "task_loss": 0.008471904322504997 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009145300835371017, "epoch": 10.39, "learning_rate": 2.6251468113395465e-05, "loss": 0.0087, "step": 10944, "task_loss": 0.005033126100897789 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009342268109321594, "epoch": 10.39, "learning_rate": 2.6240826113047235e-05, "loss": 0.0093, "step": 10945, "task_loss": 0.008507819846272469 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0181414894759655, "epoch": 10.4, "learning_rate": 2.6230183887296955e-05, "loss": 0.0168, "step": 10946, "task_loss": 0.00474889762699604 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03423862159252167, "epoch": 10.4, "learning_rate": 2.6219541438077855e-05, "loss": 0.0467, "step": 10947, "task_loss": 0.15890000760555267 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.024610301479697227, "epoch": 10.4, "learning_rate": 2.620889876732317e-05, "loss": 0.0291, "step": 10948, "task_loss": 0.06941775232553482 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013289782218635082, "epoch": 10.4, "learning_rate": 2.6198255876966204e-05, "loss": 0.0125, "step": 10949, "task_loss": 0.005243049934506416 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026343289762735367, "epoch": 10.4, "learning_rate": 2.6187612768940293e-05, "loss": 0.0311, "step": 10950, "task_loss": 0.0735698863863945 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.015885502099990845, "epoch": 10.4, "learning_rate": 2.61769694451788e-05, "loss": 0.0194, "step": 10951, "task_loss": 0.051202740520238876 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.057844310998916626, "epoch": 10.4, "learning_rate": 2.616632590761514e-05, "loss": 0.0603, "step": 10952, "task_loss": 0.08287277817726135 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012808309867978096, "epoch": 10.4, "learning_rate": 2.615568215818276e-05, "loss": 0.0121, "step": 10953, "task_loss": 0.005606703460216522 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016954544931650162, "epoch": 10.4, "learning_rate": 2.6145038198815152e-05, "loss": 0.0158, "step": 10954, "task_loss": 0.005178598687052727 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0657939538359642, "epoch": 10.4, "learning_rate": 2.6134394031445843e-05, "loss": 0.0743, "step": 10955, "task_loss": 0.15127022564411163 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02318597212433815, "epoch": 10.4, "learning_rate": 2.6123749658008383e-05, "loss": 0.0273, "step": 10956, "task_loss": 0.06436937302350998 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.050585560500621796, "epoch": 10.41, "learning_rate": 2.6113105080436396e-05, "loss": 0.0512, "step": 10957, "task_loss": 0.05654662102460861 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.03527712821960449, "epoch": 10.41, "learning_rate": 2.6102460300663506e-05, "loss": 0.0369, "step": 10958, "task_loss": 0.0516989640891552 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011321203783154488, "epoch": 10.41, "learning_rate": 2.60918153206234e-05, "loss": 0.0106, "step": 10959, "task_loss": 0.00376252643764019 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014566227793693542, "epoch": 10.41, "learning_rate": 2.6081170142249773e-05, "loss": 0.0197, "step": 10960, "task_loss": 0.06615670025348663 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01640435680747032, "epoch": 10.41, "learning_rate": 2.607052476747639e-05, "loss": 0.0155, "step": 10961, "task_loss": 0.007749777287244797 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.0176126379519701, "epoch": 10.41, "learning_rate": 2.6059879198237026e-05, "loss": 0.0162, "step": 10962, "task_loss": 0.0036756154149770737 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.034288398921489716, "epoch": 10.41, "learning_rate": 2.6049233436465498e-05, "loss": 0.0426, "step": 10963, "task_loss": 0.1172315701842308 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.010387426242232323, "epoch": 10.41, "learning_rate": 2.6038587484095673e-05, "loss": 0.0171, "step": 10964, "task_loss": 0.07761921733617783 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016161680221557617, "epoch": 10.41, "learning_rate": 2.6027941343061412e-05, "loss": 0.024, "step": 10965, "task_loss": 0.0944659486413002 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.009701198898255825, "epoch": 10.41, "learning_rate": 2.6017295015296665e-05, "loss": 0.0091, "step": 10966, "task_loss": 0.0035239197313785553 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02417180687189102, "epoch": 10.42, "learning_rate": 2.600664850273538e-05, "loss": 0.0263, "step": 10967, "task_loss": 0.04507189989089966 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07505623996257782, "epoch": 10.42, "learning_rate": 2.599600180731155e-05, "loss": 0.0926, "step": 10968, "task_loss": 0.2502027750015259 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.059122804552316666, "epoch": 10.42, "learning_rate": 2.598535493095919e-05, "loss": 0.0613, "step": 10969, "task_loss": 0.08061361312866211 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01452677696943283, "epoch": 10.42, "learning_rate": 2.5974707875612357e-05, "loss": 0.021, "step": 10970, "task_loss": 0.07877064496278763 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.019884496927261353, "epoch": 10.42, "learning_rate": 2.5964060643205153e-05, "loss": 0.0246, "step": 10971, "task_loss": 0.06657460331916809 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013773739337921143, "epoch": 10.42, "learning_rate": 2.5953413235671688e-05, "loss": 0.0197, "step": 10972, "task_loss": 0.07330326735973358 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021291140466928482, "epoch": 10.42, "learning_rate": 2.594276565494611e-05, "loss": 0.0245, "step": 10973, "task_loss": 0.0534619465470314 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.038762666285037994, "epoch": 10.42, "learning_rate": 2.5932117902962616e-05, "loss": 0.0361, "step": 10974, "task_loss": 0.012404365465044975 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.007273940369486809, "epoch": 10.42, "learning_rate": 2.5921469981655415e-05, "loss": 0.007, "step": 10975, "task_loss": 0.004414750263094902 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.09941526502370834, "epoch": 10.42, "learning_rate": 2.591082189295876e-05, "loss": 0.0983, "step": 10976, "task_loss": 0.08790935575962067 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013018698431551456, "epoch": 10.42, "learning_rate": 2.590017363880691e-05, "loss": 0.0121, "step": 10977, "task_loss": 0.004249611869454384 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.014059076085686684, "epoch": 10.43, "learning_rate": 2.5889525221134192e-05, "loss": 0.0204, "step": 10978, "task_loss": 0.07782687991857529 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013228708878159523, "epoch": 10.43, "learning_rate": 2.5878876641874928e-05, "loss": 0.0125, "step": 10979, "task_loss": 0.005660973489284515 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.026601828634738922, "epoch": 10.43, "learning_rate": 2.5868227902963493e-05, "loss": 0.0369, "step": 10980, "task_loss": 0.12926051020622253 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05293282866477966, "epoch": 10.43, "learning_rate": 2.5857579006334282e-05, "loss": 0.0522, "step": 10981, "task_loss": 0.046048957854509354 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.07201385498046875, "epoch": 10.43, "learning_rate": 2.58469299539217e-05, "loss": 0.0862, "step": 10982, "task_loss": 0.21407830715179443 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.011810416355729103, "epoch": 10.43, "learning_rate": 2.5836280747660225e-05, "loss": 0.0173, "step": 10983, "task_loss": 0.0669318437576294 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.023423772305250168, "epoch": 10.43, "learning_rate": 2.5825631389484323e-05, "loss": 0.0259, "step": 10984, "task_loss": 0.04826189577579498 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.022933730855584145, "epoch": 10.43, "learning_rate": 2.58149818813285e-05, "loss": 0.033, "step": 10985, "task_loss": 0.12334097921848297 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.04635820910334587, "epoch": 10.43, "learning_rate": 2.5804332225127294e-05, "loss": 0.0514, "step": 10986, "task_loss": 0.09712212532758713 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.012980525381863117, "epoch": 10.43, "learning_rate": 2.579368242281527e-05, "loss": 0.02, "step": 10987, "task_loss": 0.08316182345151901 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017791984602808952, "epoch": 10.43, "learning_rate": 2.5783032476327007e-05, "loss": 0.0227, "step": 10988, "task_loss": 0.06660401821136475 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.017145544290542603, "epoch": 10.44, "learning_rate": 2.5772382387597128e-05, "loss": 0.0236, "step": 10989, "task_loss": 0.08195091038942337 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.018846768885850906, "epoch": 10.44, "learning_rate": 2.5761732158560263e-05, "loss": 0.0216, "step": 10990, "task_loss": 0.04650796204805374 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.021697044372558594, "epoch": 10.44, "learning_rate": 2.5751081791151083e-05, "loss": 0.0356, "step": 10991, "task_loss": 0.16030679643154144 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013004236854612827, "epoch": 10.44, "learning_rate": 2.574043128730428e-05, "loss": 0.0135, "step": 10992, "task_loss": 0.017738407477736473 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.02329951897263527, "epoch": 10.44, "learning_rate": 2.572978064895457e-05, "loss": 0.0298, "step": 10993, "task_loss": 0.08830928802490234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.020550910383462906, "epoch": 10.44, "learning_rate": 2.5719129878036686e-05, "loss": 0.0193, "step": 10994, "task_loss": 0.007566181942820549 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05504726618528366, "epoch": 10.44, "learning_rate": 2.5708478976485402e-05, "loss": 0.0543, "step": 10995, "task_loss": 0.04714515060186386 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05265543609857559, "epoch": 10.44, "learning_rate": 2.569782794623549e-05, "loss": 0.0528, "step": 10996, "task_loss": 0.05369632691144943 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.01432766579091549, "epoch": 10.44, "learning_rate": 2.5687176789221784e-05, "loss": 0.0136, "step": 10997, "task_loss": 0.0071462057530879974 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.016092218458652496, "epoch": 10.44, "learning_rate": 2.5676525507379097e-05, "loss": 0.0155, "step": 10998, "task_loss": 0.010227423161268234 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.013000791892409325, "epoch": 10.45, "learning_rate": 2.566587410264229e-05, "loss": 0.021, "step": 10999, "task_loss": 0.09294840693473816 }, { "compression/magnitude_sparsity/sparsity_level_for_model": 0.6249402080370299, "compression/magnitude_sparsity/sparsity_level_for_sparsified_layers": 0.8000000257231671, "compression/magnitude_sparsity/target_sparsity_level": 0.8, "compression_loss": 0.0, "distillation_loss": 0.05334077402949333, "epoch": 10.45, "learning_rate": 2.565522257694625e-05, "loss": 0.0488, "step": 11000, "task_loss": 0.0074908919632434845 }, { "epoch": 10.45, "eval_accuracy": 0.9128440366972477, "eval_loss": 0.42293068766593933, "eval_runtime": 17.8866, "eval_samples_per_second": 48.752, "eval_steps_per_second": 6.094, "step": 11000 } ], "max_steps": 29484, "num_train_epochs": 28, "total_flos": 4.62792613049088e+16, "trial_name": null, "trial_params": null }