yujiepan's picture
upload model
c6b8155
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.8008673653776652,
"global_step": 7750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"compression_loss": 0.0,
"distillation_loss": 13.863856315612793,
"epoch": 0.0,
"learning_rate": 1.5178894109143475e-07,
"loss": 14.3579,
"step": 10,
"task_loss": 5.79541015625
},
{
"compression_loss": 0.0,
"distillation_loss": 15.529892921447754,
"epoch": 0.01,
"learning_rate": 3.6863028550777017e-07,
"loss": 14.5251,
"step": 20,
"task_loss": 5.82769775390625
},
{
"compression_loss": 0.0,
"distillation_loss": 15.667880058288574,
"epoch": 0.01,
"learning_rate": 5.637874954824719e-07,
"loss": 14.2874,
"step": 30,
"task_loss": 5.782501220703125
},
{
"compression_loss": 0.0,
"distillation_loss": 16.32543182373047,
"epoch": 0.01,
"learning_rate": 7.806288398988074e-07,
"loss": 14.482,
"step": 40,
"task_loss": 5.93621826171875
},
{
"compression_loss": 0.0,
"distillation_loss": 15.878634452819824,
"epoch": 0.02,
"learning_rate": 9.974701843151429e-07,
"loss": 14.0581,
"step": 50,
"task_loss": 5.87786865234375
},
{
"compression_loss": 0.0,
"distillation_loss": 15.618017196655273,
"epoch": 0.02,
"learning_rate": 1.214311528731478e-06,
"loss": 14.4649,
"step": 60,
"task_loss": 5.95428466796875
},
{
"compression_loss": 0.0,
"distillation_loss": 14.689329147338867,
"epoch": 0.03,
"learning_rate": 1.4311528731478136e-06,
"loss": 14.1966,
"step": 70,
"task_loss": 5.9617919921875
},
{
"compression_loss": 0.0,
"distillation_loss": 15.500862121582031,
"epoch": 0.03,
"learning_rate": 1.647994217564149e-06,
"loss": 14.1659,
"step": 80,
"task_loss": 5.9317626953125
},
{
"compression_loss": 0.0,
"distillation_loss": 15.836941719055176,
"epoch": 0.03,
"learning_rate": 1.8648355619804843e-06,
"loss": 14.0619,
"step": 90,
"task_loss": 5.79229736328125
},
{
"compression_loss": 0.0,
"distillation_loss": 15.45178508758545,
"epoch": 0.04,
"learning_rate": 2.08167690639682e-06,
"loss": 13.9798,
"step": 100,
"task_loss": 5.859405517578125
},
{
"compression_loss": 0.0,
"distillation_loss": 14.564159393310547,
"epoch": 0.04,
"learning_rate": 2.298518250813155e-06,
"loss": 14.0794,
"step": 110,
"task_loss": 5.789794921875
},
{
"compression_loss": 0.0,
"distillation_loss": 13.552111625671387,
"epoch": 0.04,
"learning_rate": 2.5153595952294906e-06,
"loss": 13.8458,
"step": 120,
"task_loss": 5.688568115234375
},
{
"compression_loss": 0.0,
"distillation_loss": 14.749038696289062,
"epoch": 0.05,
"learning_rate": 2.732200939645826e-06,
"loss": 13.8899,
"step": 130,
"task_loss": 5.49835205078125
},
{
"compression_loss": 0.0,
"distillation_loss": 14.9246826171875,
"epoch": 0.05,
"learning_rate": 2.9490422840621613e-06,
"loss": 13.7831,
"step": 140,
"task_loss": 5.456787109375
},
{
"compression_loss": 0.0,
"distillation_loss": 14.531848907470703,
"epoch": 0.05,
"learning_rate": 3.1658836284784965e-06,
"loss": 13.4898,
"step": 150,
"task_loss": 5.613800048828125
},
{
"compression_loss": 0.0,
"distillation_loss": 15.012535095214844,
"epoch": 0.06,
"learning_rate": 3.382724972894832e-06,
"loss": 13.6949,
"step": 160,
"task_loss": 5.2825927734375
},
{
"compression_loss": 0.0,
"distillation_loss": 14.039583206176758,
"epoch": 0.06,
"learning_rate": 3.5995663173111676e-06,
"loss": 13.182,
"step": 170,
"task_loss": 5.353515625
},
{
"compression_loss": 0.0,
"distillation_loss": 13.0057373046875,
"epoch": 0.07,
"learning_rate": 3.816407661727503e-06,
"loss": 12.6947,
"step": 180,
"task_loss": 5.157745361328125
},
{
"compression_loss": 0.0,
"distillation_loss": 14.364778518676758,
"epoch": 0.07,
"learning_rate": 4.033249006143838e-06,
"loss": 12.8377,
"step": 190,
"task_loss": 5.151580810546875
},
{
"compression_loss": 0.0,
"distillation_loss": 12.603987693786621,
"epoch": 0.07,
"learning_rate": 4.2500903505601735e-06,
"loss": 12.5114,
"step": 200,
"task_loss": 5.133880615234375
},
{
"compression_loss": 0.0,
"distillation_loss": 13.770363807678223,
"epoch": 0.08,
"learning_rate": 4.466931694976509e-06,
"loss": 12.571,
"step": 210,
"task_loss": 4.792236328125
},
{
"compression_loss": 0.0,
"distillation_loss": 12.380254745483398,
"epoch": 0.08,
"learning_rate": 4.683773039392844e-06,
"loss": 12.1934,
"step": 220,
"task_loss": 4.9013671875
},
{
"compression_loss": 0.0,
"distillation_loss": 12.504887580871582,
"epoch": 0.08,
"learning_rate": 4.900614383809179e-06,
"loss": 12.2029,
"step": 230,
"task_loss": 4.988494873046875
},
{
"compression_loss": 0.0,
"distillation_loss": 12.855850219726562,
"epoch": 0.09,
"learning_rate": 5.117455728225516e-06,
"loss": 11.5489,
"step": 240,
"task_loss": 4.76690673828125
},
{
"compression_loss": 0.0,
"distillation_loss": 11.811483383178711,
"epoch": 0.09,
"learning_rate": 5.3342970726418505e-06,
"loss": 11.297,
"step": 250,
"task_loss": 4.54803466796875
},
{
"epoch": 0.09,
"eval_exact_match": 2.7341532639545885,
"eval_f1": 12.629112049670779,
"step": 250
},
{
"compression_loss": 0.0,
"distillation_loss": 11.374210357666016,
"epoch": 0.09,
"learning_rate": 5.551138417058186e-06,
"loss": 10.9207,
"step": 260,
"task_loss": 4.5309906005859375
},
{
"compression_loss": 0.0,
"distillation_loss": 10.759222030639648,
"epoch": 0.1,
"learning_rate": 5.767979761474522e-06,
"loss": 10.4359,
"step": 270,
"task_loss": 4.0685882568359375
},
{
"compression_loss": 0.0,
"distillation_loss": 10.636564254760742,
"epoch": 0.1,
"learning_rate": 5.984821105890856e-06,
"loss": 9.9935,
"step": 280,
"task_loss": 4.374786376953125
},
{
"compression_loss": 0.0,
"distillation_loss": 10.036893844604492,
"epoch": 0.1,
"learning_rate": 6.201662450307192e-06,
"loss": 9.4053,
"step": 290,
"task_loss": 3.8459930419921875
},
{
"compression_loss": 0.0,
"distillation_loss": 9.233875274658203,
"epoch": 0.11,
"learning_rate": 6.4185037947235275e-06,
"loss": 9.0732,
"step": 300,
"task_loss": 3.669677734375
},
{
"compression_loss": 0.0,
"distillation_loss": 8.439445495605469,
"epoch": 0.11,
"learning_rate": 6.635345139139862e-06,
"loss": 8.6626,
"step": 310,
"task_loss": 3.1441650390625
},
{
"compression_loss": 0.0,
"distillation_loss": 8.574735641479492,
"epoch": 0.12,
"learning_rate": 6.852186483556198e-06,
"loss": 8.297,
"step": 320,
"task_loss": 3.4480514526367188
},
{
"compression_loss": 0.0,
"distillation_loss": 8.304073333740234,
"epoch": 0.12,
"learning_rate": 7.069027827972534e-06,
"loss": 8.0954,
"step": 330,
"task_loss": 3.2958412170410156
},
{
"compression_loss": 0.0,
"distillation_loss": 7.721191883087158,
"epoch": 0.12,
"learning_rate": 7.285869172388869e-06,
"loss": 7.3317,
"step": 340,
"task_loss": 3.191631317138672
},
{
"compression_loss": 0.0,
"distillation_loss": 7.107563495635986,
"epoch": 0.13,
"learning_rate": 7.5027105168052046e-06,
"loss": 7.0177,
"step": 350,
"task_loss": 3.102968215942383
},
{
"compression_loss": 0.0,
"distillation_loss": 7.762242794036865,
"epoch": 0.13,
"learning_rate": 7.71955186122154e-06,
"loss": 6.7813,
"step": 360,
"task_loss": 4.048648834228516
},
{
"compression_loss": 0.0,
"distillation_loss": 6.884188652038574,
"epoch": 0.13,
"learning_rate": 7.936393205637875e-06,
"loss": 6.6125,
"step": 370,
"task_loss": 2.772972583770752
},
{
"compression_loss": 0.0,
"distillation_loss": 6.817141056060791,
"epoch": 0.14,
"learning_rate": 8.15323455005421e-06,
"loss": 6.1752,
"step": 380,
"task_loss": 3.29545259475708
},
{
"compression_loss": 0.0,
"distillation_loss": 6.7478532791137695,
"epoch": 0.14,
"learning_rate": 8.370075894470546e-06,
"loss": 5.9283,
"step": 390,
"task_loss": 2.940948486328125
},
{
"compression_loss": 0.0,
"distillation_loss": 6.400359153747559,
"epoch": 0.14,
"learning_rate": 8.586917238886882e-06,
"loss": 5.7384,
"step": 400,
"task_loss": 3.4309067726135254
},
{
"compression_loss": 0.0,
"distillation_loss": 5.582571983337402,
"epoch": 0.15,
"learning_rate": 8.803758583303217e-06,
"loss": 5.6473,
"step": 410,
"task_loss": 2.701523780822754
},
{
"compression_loss": 0.0,
"distillation_loss": 5.448908805847168,
"epoch": 0.15,
"learning_rate": 9.020599927719551e-06,
"loss": 5.7486,
"step": 420,
"task_loss": 3.1738014221191406
},
{
"compression_loss": 0.0,
"distillation_loss": 6.087796211242676,
"epoch": 0.16,
"learning_rate": 9.237441272135887e-06,
"loss": 5.4961,
"step": 430,
"task_loss": 3.124908447265625
},
{
"compression_loss": 0.0,
"distillation_loss": 5.375993728637695,
"epoch": 0.16,
"learning_rate": 9.454282616552222e-06,
"loss": 5.2309,
"step": 440,
"task_loss": 2.6864736080169678
},
{
"compression_loss": 0.0,
"distillation_loss": 5.898089408874512,
"epoch": 0.16,
"learning_rate": 9.671123960968558e-06,
"loss": 5.3107,
"step": 450,
"task_loss": 2.6333131790161133
},
{
"compression_loss": 0.0,
"distillation_loss": 5.690750598907471,
"epoch": 0.17,
"learning_rate": 9.887965305384893e-06,
"loss": 5.0992,
"step": 460,
"task_loss": 2.886949062347412
},
{
"compression_loss": 0.0,
"distillation_loss": 5.3047871589660645,
"epoch": 0.17,
"learning_rate": 1.010480664980123e-05,
"loss": 5.1496,
"step": 470,
"task_loss": 2.6398158073425293
},
{
"compression_loss": 0.0,
"distillation_loss": 5.096426010131836,
"epoch": 0.17,
"learning_rate": 1.0321647994217564e-05,
"loss": 4.888,
"step": 480,
"task_loss": 3.002800941467285
},
{
"compression_loss": 0.0,
"distillation_loss": 5.322364807128906,
"epoch": 0.18,
"learning_rate": 1.05384893386339e-05,
"loss": 4.926,
"step": 490,
"task_loss": 2.943061590194702
},
{
"compression_loss": 0.0,
"distillation_loss": 5.079683303833008,
"epoch": 0.18,
"learning_rate": 1.0755330683050236e-05,
"loss": 4.7843,
"step": 500,
"task_loss": 2.4856796264648438
},
{
"epoch": 0.18,
"eval_exact_match": 47.6631977294229,
"eval_f1": 61.84689508471956,
"step": 500
},
{
"compression_loss": 0.0,
"distillation_loss": 4.29311466217041,
"epoch": 0.18,
"learning_rate": 1.0972172027466571e-05,
"loss": 4.6627,
"step": 510,
"task_loss": 2.2312231063842773
},
{
"compression_loss": 0.0,
"distillation_loss": 4.374029159545898,
"epoch": 0.19,
"learning_rate": 1.1189013371882907e-05,
"loss": 4.7554,
"step": 520,
"task_loss": 1.571474313735962
},
{
"compression_loss": 0.0,
"distillation_loss": 5.325689315795898,
"epoch": 0.19,
"learning_rate": 1.1405854716299242e-05,
"loss": 4.5999,
"step": 530,
"task_loss": 2.791367530822754
},
{
"compression_loss": 0.0,
"distillation_loss": 4.6214494705200195,
"epoch": 0.2,
"learning_rate": 1.1622696060715576e-05,
"loss": 4.3316,
"step": 540,
"task_loss": 2.6126604080200195
},
{
"compression_loss": 0.0,
"distillation_loss": 4.899660110473633,
"epoch": 0.2,
"learning_rate": 1.1839537405131912e-05,
"loss": 4.5563,
"step": 550,
"task_loss": 2.572834014892578
},
{
"compression_loss": 0.0,
"distillation_loss": 4.496711730957031,
"epoch": 0.2,
"learning_rate": 1.2056378749548247e-05,
"loss": 4.277,
"step": 560,
"task_loss": 2.6678249835968018
},
{
"compression_loss": 0.0,
"distillation_loss": 4.555987358093262,
"epoch": 0.21,
"learning_rate": 1.2273220093964583e-05,
"loss": 4.3194,
"step": 570,
"task_loss": 2.6166036128997803
},
{
"compression_loss": 0.0,
"distillation_loss": 4.9610090255737305,
"epoch": 0.21,
"learning_rate": 1.2490061438380918e-05,
"loss": 4.2434,
"step": 580,
"task_loss": 2.6043477058410645
},
{
"compression_loss": 0.0,
"distillation_loss": 4.213617324829102,
"epoch": 0.21,
"learning_rate": 1.2706902782797254e-05,
"loss": 4.273,
"step": 590,
"task_loss": 1.856445550918579
},
{
"compression_loss": 0.0,
"distillation_loss": 4.79631233215332,
"epoch": 0.22,
"learning_rate": 1.2923744127213588e-05,
"loss": 4.3605,
"step": 600,
"task_loss": 2.3547120094299316
},
{
"compression_loss": 0.0,
"distillation_loss": 4.130844593048096,
"epoch": 0.22,
"learning_rate": 1.3140585471629924e-05,
"loss": 4.1307,
"step": 610,
"task_loss": 2.737905979156494
},
{
"compression_loss": 0.0,
"distillation_loss": 4.68433952331543,
"epoch": 0.22,
"learning_rate": 1.3357426816046259e-05,
"loss": 4.228,
"step": 620,
"task_loss": 2.0046231746673584
},
{
"compression_loss": 0.0,
"distillation_loss": 3.9693734645843506,
"epoch": 0.23,
"learning_rate": 1.3574268160462595e-05,
"loss": 4.0317,
"step": 630,
"task_loss": 1.786435604095459
},
{
"compression_loss": 0.0,
"distillation_loss": 4.474964618682861,
"epoch": 0.23,
"learning_rate": 1.379110950487893e-05,
"loss": 3.9845,
"step": 640,
"task_loss": 2.3881354331970215
},
{
"compression_loss": 0.0,
"distillation_loss": 4.220601558685303,
"epoch": 0.23,
"learning_rate": 1.4007950849295268e-05,
"loss": 4.126,
"step": 650,
"task_loss": 2.3482580184936523
},
{
"compression_loss": 0.0,
"distillation_loss": 4.762381553649902,
"epoch": 0.24,
"learning_rate": 1.4224792193711601e-05,
"loss": 3.9768,
"step": 660,
"task_loss": 2.4734535217285156
},
{
"compression_loss": 0.0,
"distillation_loss": 3.62750244140625,
"epoch": 0.24,
"learning_rate": 1.4441633538127937e-05,
"loss": 3.7916,
"step": 670,
"task_loss": 1.8247270584106445
},
{
"compression_loss": 0.0,
"distillation_loss": 3.762209415435791,
"epoch": 0.25,
"learning_rate": 1.4658474882544273e-05,
"loss": 3.7097,
"step": 680,
"task_loss": 1.700040578842163
},
{
"compression_loss": 0.0,
"distillation_loss": 3.5018386840820312,
"epoch": 0.25,
"learning_rate": 1.4875316226960608e-05,
"loss": 3.5434,
"step": 690,
"task_loss": 1.8874092102050781
},
{
"compression_loss": 0.0,
"distillation_loss": 3.7156856060028076,
"epoch": 0.25,
"learning_rate": 1.5092157571376944e-05,
"loss": 3.6905,
"step": 700,
"task_loss": 1.9769742488861084
},
{
"compression_loss": 0.0,
"distillation_loss": 3.2249040603637695,
"epoch": 0.26,
"learning_rate": 1.530899891579328e-05,
"loss": 3.6165,
"step": 710,
"task_loss": 1.8299891948699951
},
{
"compression_loss": 0.0,
"distillation_loss": 3.8128280639648438,
"epoch": 0.26,
"learning_rate": 1.5525840260209615e-05,
"loss": 3.6727,
"step": 720,
"task_loss": 1.8565778732299805
},
{
"compression_loss": 0.0,
"distillation_loss": 3.894437789916992,
"epoch": 0.26,
"learning_rate": 1.574268160462595e-05,
"loss": 3.8419,
"step": 730,
"task_loss": 1.9490731954574585
},
{
"compression_loss": 0.0,
"distillation_loss": 3.6588613986968994,
"epoch": 0.27,
"learning_rate": 1.5959522949042286e-05,
"loss": 3.719,
"step": 740,
"task_loss": 2.055631399154663
},
{
"compression_loss": 0.0,
"distillation_loss": 3.815298557281494,
"epoch": 0.27,
"learning_rate": 1.6176364293458618e-05,
"loss": 3.5813,
"step": 750,
"task_loss": 1.935587763786316
},
{
"epoch": 0.27,
"eval_exact_match": 58.55250709555345,
"eval_f1": 70.94607775185224,
"step": 750
},
{
"compression_loss": 0.0,
"distillation_loss": 4.76840877532959,
"epoch": 0.27,
"learning_rate": 1.6393205637874954e-05,
"loss": 3.5721,
"step": 760,
"task_loss": 2.5942468643188477
},
{
"compression_loss": 0.0,
"distillation_loss": 3.8197884559631348,
"epoch": 0.28,
"learning_rate": 1.661004698229129e-05,
"loss": 3.406,
"step": 770,
"task_loss": 2.399030923843384
},
{
"compression_loss": 0.0,
"distillation_loss": 3.773465633392334,
"epoch": 0.28,
"learning_rate": 1.6826888326707625e-05,
"loss": 3.6862,
"step": 780,
"task_loss": 2.0786447525024414
},
{
"compression_loss": 0.0,
"distillation_loss": 3.5512259006500244,
"epoch": 0.29,
"learning_rate": 1.704372967112396e-05,
"loss": 3.4023,
"step": 790,
"task_loss": 1.7031829357147217
},
{
"compression_loss": 0.0,
"distillation_loss": 4.154162406921387,
"epoch": 0.29,
"learning_rate": 1.7260571015540296e-05,
"loss": 3.5814,
"step": 800,
"task_loss": 2.033867597579956
},
{
"compression_loss": 0.0,
"distillation_loss": 3.1742305755615234,
"epoch": 0.29,
"learning_rate": 1.747741235995663e-05,
"loss": 3.5524,
"step": 810,
"task_loss": 1.7235503196716309
},
{
"compression_loss": 0.0,
"distillation_loss": 3.667600154876709,
"epoch": 0.3,
"learning_rate": 1.7694253704372967e-05,
"loss": 3.1374,
"step": 820,
"task_loss": 2.0208334922790527
},
{
"compression_loss": 0.0,
"distillation_loss": 3.541199207305908,
"epoch": 0.3,
"learning_rate": 1.7911095048789303e-05,
"loss": 3.5502,
"step": 830,
"task_loss": 2.0909945964813232
},
{
"compression_loss": 0.0,
"distillation_loss": 3.158015012741089,
"epoch": 0.3,
"learning_rate": 1.812793639320564e-05,
"loss": 3.1089,
"step": 840,
"task_loss": 1.5247281789779663
},
{
"compression_loss": 0.0,
"distillation_loss": 3.0832254886627197,
"epoch": 0.31,
"learning_rate": 1.8344777737621974e-05,
"loss": 3.3842,
"step": 850,
"task_loss": 1.5567718744277954
},
{
"compression_loss": 0.0,
"distillation_loss": 3.231353521347046,
"epoch": 0.31,
"learning_rate": 1.856161908203831e-05,
"loss": 3.2243,
"step": 860,
"task_loss": 2.2011852264404297
},
{
"compression_loss": 0.0,
"distillation_loss": 3.409111261367798,
"epoch": 0.31,
"learning_rate": 1.8778460426454645e-05,
"loss": 3.1738,
"step": 870,
"task_loss": 1.6380929946899414
},
{
"compression_loss": 0.0,
"distillation_loss": 2.968132495880127,
"epoch": 0.32,
"learning_rate": 1.8995301770870977e-05,
"loss": 3.184,
"step": 880,
"task_loss": 1.0741721391677856
},
{
"compression_loss": 0.0,
"distillation_loss": 3.7900476455688477,
"epoch": 0.32,
"learning_rate": 1.9212143115287313e-05,
"loss": 3.3852,
"step": 890,
"task_loss": 2.300794839859009
},
{
"compression_loss": 0.0,
"distillation_loss": 3.6072299480438232,
"epoch": 0.33,
"learning_rate": 1.942898445970365e-05,
"loss": 3.0648,
"step": 900,
"task_loss": 1.9764083623886108
},
{
"compression_loss": 0.0,
"distillation_loss": 3.560149908065796,
"epoch": 0.33,
"learning_rate": 1.9645825804119984e-05,
"loss": 3.1941,
"step": 910,
"task_loss": 2.037846088409424
},
{
"compression_loss": 0.0,
"distillation_loss": 3.539365768432617,
"epoch": 0.33,
"learning_rate": 1.986266714853632e-05,
"loss": 3.1535,
"step": 920,
"task_loss": 1.5650080442428589
},
{
"compression_loss": 0.0,
"distillation_loss": 3.952303409576416,
"epoch": 0.34,
"learning_rate": 2.007950849295266e-05,
"loss": 3.0291,
"step": 930,
"task_loss": 2.1716508865356445
},
{
"compression_loss": 0.0,
"distillation_loss": 4.5406928062438965,
"epoch": 0.34,
"learning_rate": 2.0296349837368994e-05,
"loss": 3.2595,
"step": 940,
"task_loss": 2.8979098796844482
},
{
"compression_loss": 0.0,
"distillation_loss": 3.1165611743927,
"epoch": 0.34,
"learning_rate": 2.051319118178533e-05,
"loss": 3.0601,
"step": 950,
"task_loss": 1.5383003950119019
},
{
"compression_loss": 0.0,
"distillation_loss": 3.121710777282715,
"epoch": 0.35,
"learning_rate": 2.0730032526201665e-05,
"loss": 3.1225,
"step": 960,
"task_loss": 1.6968241930007935
},
{
"compression_loss": 0.0,
"distillation_loss": 3.3333802223205566,
"epoch": 0.35,
"learning_rate": 2.0946873870618e-05,
"loss": 3.0869,
"step": 970,
"task_loss": 2.1477713584899902
},
{
"compression_loss": 0.0,
"distillation_loss": 3.538954257965088,
"epoch": 0.35,
"learning_rate": 2.1163715215034336e-05,
"loss": 2.8604,
"step": 980,
"task_loss": 1.5192737579345703
},
{
"compression_loss": 0.0,
"distillation_loss": 2.781994581222534,
"epoch": 0.36,
"learning_rate": 2.1380556559450672e-05,
"loss": 2.6585,
"step": 990,
"task_loss": 1.9368581771850586
},
{
"compression_loss": 0.0,
"distillation_loss": 3.892576217651367,
"epoch": 0.36,
"learning_rate": 2.1597397903867004e-05,
"loss": 3.2002,
"step": 1000,
"task_loss": 2.37716007232666
},
{
"epoch": 0.36,
"eval_exact_match": 64.34247871333964,
"eval_f1": 76.12832427154721,
"step": 1000
},
{
"compression_loss": 0.0,
"distillation_loss": 3.0241000652313232,
"epoch": 0.37,
"learning_rate": 2.181423924828334e-05,
"loss": 3.118,
"step": 1010,
"task_loss": 1.4347963333129883
},
{
"compression_loss": 0.0,
"distillation_loss": 3.0104122161865234,
"epoch": 0.37,
"learning_rate": 2.2031080592699675e-05,
"loss": 2.8552,
"step": 1020,
"task_loss": 1.7555286884307861
},
{
"compression_loss": 0.0,
"distillation_loss": 2.568011999130249,
"epoch": 0.37,
"learning_rate": 2.224792193711601e-05,
"loss": 2.918,
"step": 1030,
"task_loss": 1.1540263891220093
},
{
"compression_loss": 0.0,
"distillation_loss": 3.7906365394592285,
"epoch": 0.38,
"learning_rate": 2.2464763281532346e-05,
"loss": 3.0068,
"step": 1040,
"task_loss": 2.4095299243927
},
{
"compression_loss": 0.0,
"distillation_loss": 2.682779312133789,
"epoch": 0.38,
"learning_rate": 2.2681604625948682e-05,
"loss": 2.6718,
"step": 1050,
"task_loss": 1.969969391822815
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5220847129821777,
"epoch": 0.38,
"learning_rate": 2.2898445970365018e-05,
"loss": 2.6947,
"step": 1060,
"task_loss": 1.0111178159713745
},
{
"compression_loss": 0.0,
"distillation_loss": 2.6033337116241455,
"epoch": 0.39,
"learning_rate": 2.3115287314781353e-05,
"loss": 2.8889,
"step": 1070,
"task_loss": 1.8982610702514648
},
{
"compression_loss": 0.0,
"distillation_loss": 3.147583484649658,
"epoch": 0.39,
"learning_rate": 2.333212865919769e-05,
"loss": 2.8495,
"step": 1080,
"task_loss": 1.5683348178863525
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5625667572021484,
"epoch": 0.39,
"learning_rate": 2.3548970003614024e-05,
"loss": 2.9222,
"step": 1090,
"task_loss": 1.891990065574646
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2590126991271973,
"epoch": 0.4,
"learning_rate": 2.376581134803036e-05,
"loss": 2.8199,
"step": 1100,
"task_loss": 1.4266440868377686
},
{
"compression_loss": 0.0,
"distillation_loss": 3.4584269523620605,
"epoch": 0.4,
"learning_rate": 2.3982652692446695e-05,
"loss": 2.856,
"step": 1110,
"task_loss": 1.945828914642334
},
{
"compression_loss": 0.0,
"distillation_loss": 2.8447415828704834,
"epoch": 0.4,
"learning_rate": 2.4199494036863028e-05,
"loss": 2.5436,
"step": 1120,
"task_loss": 1.3307209014892578
},
{
"compression_loss": 0.0,
"distillation_loss": 2.670109510421753,
"epoch": 0.41,
"learning_rate": 2.4416335381279363e-05,
"loss": 2.7268,
"step": 1130,
"task_loss": 1.9327623844146729
},
{
"compression_loss": 0.0,
"distillation_loss": 2.741763114929199,
"epoch": 0.41,
"learning_rate": 2.46331767256957e-05,
"loss": 2.8081,
"step": 1140,
"task_loss": 1.631276249885559
},
{
"compression_loss": 0.0,
"distillation_loss": 2.837188482284546,
"epoch": 0.42,
"learning_rate": 2.4850018070112034e-05,
"loss": 2.6292,
"step": 1150,
"task_loss": 1.422114372253418
},
{
"compression_loss": 0.0,
"distillation_loss": 3.2892446517944336,
"epoch": 0.42,
"learning_rate": 2.506685941452837e-05,
"loss": 2.7805,
"step": 1160,
"task_loss": 1.5163651704788208
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5814714431762695,
"epoch": 0.42,
"learning_rate": 2.5283700758944705e-05,
"loss": 2.7588,
"step": 1170,
"task_loss": 1.4490866661071777
},
{
"compression_loss": 0.0,
"distillation_loss": 3.4073421955108643,
"epoch": 0.43,
"learning_rate": 2.550054210336104e-05,
"loss": 2.5651,
"step": 1180,
"task_loss": 1.9651927947998047
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1154234409332275,
"epoch": 0.43,
"learning_rate": 2.5717383447777377e-05,
"loss": 2.5451,
"step": 1190,
"task_loss": 1.5143002271652222
},
{
"compression_loss": 0.0,
"distillation_loss": 2.7987661361694336,
"epoch": 0.43,
"learning_rate": 2.5934224792193712e-05,
"loss": 2.6821,
"step": 1200,
"task_loss": 1.6177781820297241
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1852757930755615,
"epoch": 0.44,
"learning_rate": 2.6151066136610048e-05,
"loss": 2.4611,
"step": 1210,
"task_loss": 1.2211247682571411
},
{
"compression_loss": 0.0,
"distillation_loss": 2.9138824939727783,
"epoch": 0.44,
"learning_rate": 2.6367907481026383e-05,
"loss": 2.4774,
"step": 1220,
"task_loss": 1.7106552124023438
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2626287937164307,
"epoch": 0.44,
"learning_rate": 2.658474882544272e-05,
"loss": 2.5318,
"step": 1230,
"task_loss": 1.307836890220642
},
{
"compression_loss": 0.0,
"distillation_loss": 2.6250033378601074,
"epoch": 0.45,
"learning_rate": 2.680159016985905e-05,
"loss": 2.5726,
"step": 1240,
"task_loss": 1.559349536895752
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5571208000183105,
"epoch": 0.45,
"learning_rate": 2.7018431514275387e-05,
"loss": 2.489,
"step": 1250,
"task_loss": 1.1737449169158936
},
{
"epoch": 0.45,
"eval_exact_match": 67.50236518448439,
"eval_f1": 78.53072236477003,
"step": 1250
},
{
"compression_loss": 0.0,
"distillation_loss": 3.1527514457702637,
"epoch": 0.46,
"learning_rate": 2.7235272858691722e-05,
"loss": 2.6094,
"step": 1260,
"task_loss": 1.478395700454712
},
{
"compression_loss": 0.0,
"distillation_loss": 3.001819610595703,
"epoch": 0.46,
"learning_rate": 2.7452114203108058e-05,
"loss": 2.7699,
"step": 1270,
"task_loss": 1.6132569313049316
},
{
"compression_loss": 0.0,
"distillation_loss": 2.6055197715759277,
"epoch": 0.46,
"learning_rate": 2.7668955547524393e-05,
"loss": 2.4624,
"step": 1280,
"task_loss": 1.5703206062316895
},
{
"compression_loss": 0.0,
"distillation_loss": 2.221977472305298,
"epoch": 0.47,
"learning_rate": 2.788579689194073e-05,
"loss": 2.4796,
"step": 1290,
"task_loss": 1.2309448719024658
},
{
"compression_loss": 0.0,
"distillation_loss": 2.17022705078125,
"epoch": 0.47,
"learning_rate": 2.8102638236357068e-05,
"loss": 2.4461,
"step": 1300,
"task_loss": 1.4810001850128174
},
{
"compression_loss": 0.0,
"distillation_loss": 3.275075912475586,
"epoch": 0.47,
"learning_rate": 2.8319479580773403e-05,
"loss": 2.6478,
"step": 1310,
"task_loss": 2.2076168060302734
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1733217239379883,
"epoch": 0.48,
"learning_rate": 2.853632092518974e-05,
"loss": 2.3909,
"step": 1320,
"task_loss": 1.4496859312057495
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3826065063476562,
"epoch": 0.48,
"learning_rate": 2.8753162269606075e-05,
"loss": 2.3423,
"step": 1330,
"task_loss": 1.4332823753356934
},
{
"compression_loss": 0.0,
"distillation_loss": 2.4819350242614746,
"epoch": 0.48,
"learning_rate": 2.897000361402241e-05,
"loss": 2.3712,
"step": 1340,
"task_loss": 1.7626259326934814
},
{
"compression_loss": 0.0,
"distillation_loss": 3.2148211002349854,
"epoch": 0.49,
"learning_rate": 2.9186844958438746e-05,
"loss": 2.3867,
"step": 1350,
"task_loss": 1.9342608451843262
},
{
"compression_loss": 0.0,
"distillation_loss": 2.4345703125,
"epoch": 0.49,
"learning_rate": 2.9403686302855078e-05,
"loss": 2.3923,
"step": 1360,
"task_loss": 1.5897960662841797
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2300848960876465,
"epoch": 0.5,
"learning_rate": 2.9620527647271414e-05,
"loss": 2.225,
"step": 1370,
"task_loss": 1.2050740718841553
},
{
"compression_loss": 0.0,
"distillation_loss": 2.686323881149292,
"epoch": 0.5,
"learning_rate": 2.983736899168775e-05,
"loss": 2.3038,
"step": 1380,
"task_loss": 1.9944977760314941
},
{
"compression_loss": 0.0,
"distillation_loss": 2.4140007495880127,
"epoch": 0.5,
"learning_rate": 3.005421033610408e-05,
"loss": 2.3103,
"step": 1390,
"task_loss": 1.4956495761871338
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1493027210235596,
"epoch": 0.51,
"learning_rate": 3.027105168052042e-05,
"loss": 2.278,
"step": 1400,
"task_loss": 1.2530300617218018
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3148369789123535,
"epoch": 0.51,
"learning_rate": 3.0487893024936752e-05,
"loss": 2.0819,
"step": 1410,
"task_loss": 1.466301441192627
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3211307525634766,
"epoch": 0.51,
"learning_rate": 3.070473436935309e-05,
"loss": 2.2652,
"step": 1420,
"task_loss": 1.6483142375946045
},
{
"compression_loss": 0.0,
"distillation_loss": 2.6457409858703613,
"epoch": 0.52,
"learning_rate": 3.092157571376942e-05,
"loss": 2.425,
"step": 1430,
"task_loss": 1.5462552309036255
},
{
"compression_loss": 0.0,
"distillation_loss": 2.4564802646636963,
"epoch": 0.52,
"learning_rate": 3.113841705818576e-05,
"loss": 2.2011,
"step": 1440,
"task_loss": 1.7669343948364258
},
{
"compression_loss": 0.0,
"distillation_loss": 2.213484764099121,
"epoch": 0.52,
"learning_rate": 3.135525840260209e-05,
"loss": 2.3324,
"step": 1450,
"task_loss": 1.2010239362716675
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1059863567352295,
"epoch": 0.53,
"learning_rate": 3.1572099747018434e-05,
"loss": 2.3631,
"step": 1460,
"task_loss": 1.0316790342330933
},
{
"compression_loss": 0.0,
"distillation_loss": 2.007906913757324,
"epoch": 0.53,
"learning_rate": 3.178894109143476e-05,
"loss": 2.2032,
"step": 1470,
"task_loss": 1.1084401607513428
},
{
"compression_loss": 0.0,
"distillation_loss": 2.6057040691375732,
"epoch": 0.53,
"learning_rate": 3.2005782435851105e-05,
"loss": 2.3162,
"step": 1480,
"task_loss": 1.4449225664138794
},
{
"compression_loss": 0.0,
"distillation_loss": 2.419894218444824,
"epoch": 0.54,
"learning_rate": 3.222262378026744e-05,
"loss": 2.1313,
"step": 1490,
"task_loss": 1.390395164489746
},
{
"compression_loss": 0.0,
"distillation_loss": 2.433657646179199,
"epoch": 0.54,
"learning_rate": 3.2439465124683776e-05,
"loss": 2.3071,
"step": 1500,
"task_loss": 1.726881504058838
},
{
"epoch": 0.54,
"eval_exact_match": 70.34058656575213,
"eval_f1": 81.00798952513871,
"step": 1500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9914134740829468,
"epoch": 0.55,
"learning_rate": 3.265630646910011e-05,
"loss": 2.4203,
"step": 1510,
"task_loss": 1.3354167938232422
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2558791637420654,
"epoch": 0.55,
"learning_rate": 3.287314781351645e-05,
"loss": 2.3477,
"step": 1520,
"task_loss": 1.4666078090667725
},
{
"compression_loss": 0.0,
"distillation_loss": 2.4174277782440186,
"epoch": 0.55,
"learning_rate": 3.308998915793278e-05,
"loss": 2.226,
"step": 1530,
"task_loss": 1.690213918685913
},
{
"compression_loss": 0.0,
"distillation_loss": 2.663379430770874,
"epoch": 0.56,
"learning_rate": 3.330683050234912e-05,
"loss": 2.2701,
"step": 1540,
"task_loss": 1.8850646018981934
},
{
"compression_loss": 0.0,
"distillation_loss": 2.476119041442871,
"epoch": 0.56,
"learning_rate": 3.3523671846765454e-05,
"loss": 2.2315,
"step": 1550,
"task_loss": 1.7790265083312988
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5638389587402344,
"epoch": 0.56,
"learning_rate": 3.374051319118178e-05,
"loss": 2.2065,
"step": 1560,
"task_loss": 2.4445137977600098
},
{
"compression_loss": 0.0,
"distillation_loss": 2.102419376373291,
"epoch": 0.57,
"learning_rate": 3.3957354535598125e-05,
"loss": 2.0694,
"step": 1570,
"task_loss": 0.849961519241333
},
{
"compression_loss": 0.0,
"distillation_loss": 2.377769947052002,
"epoch": 0.57,
"learning_rate": 3.4174195880014454e-05,
"loss": 2.3494,
"step": 1580,
"task_loss": 1.6015727519989014
},
{
"compression_loss": 0.0,
"distillation_loss": 2.992417812347412,
"epoch": 0.57,
"learning_rate": 3.4391037224430796e-05,
"loss": 2.2862,
"step": 1590,
"task_loss": 1.7169113159179688
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1633100509643555,
"epoch": 0.58,
"learning_rate": 3.4607878568847125e-05,
"loss": 2.2645,
"step": 1600,
"task_loss": 1.168750524520874
},
{
"compression_loss": 0.0,
"distillation_loss": 1.846243977546692,
"epoch": 0.58,
"learning_rate": 3.482471991326347e-05,
"loss": 2.1987,
"step": 1610,
"task_loss": 1.0060114860534668
},
{
"compression_loss": 0.0,
"distillation_loss": 1.866645097732544,
"epoch": 0.59,
"learning_rate": 3.5041561257679796e-05,
"loss": 2.1537,
"step": 1620,
"task_loss": 1.2443784475326538
},
{
"compression_loss": 0.0,
"distillation_loss": 2.573518753051758,
"epoch": 0.59,
"learning_rate": 3.525840260209614e-05,
"loss": 2.126,
"step": 1630,
"task_loss": 2.0661087036132812
},
{
"compression_loss": 0.0,
"distillation_loss": 2.5710911750793457,
"epoch": 0.59,
"learning_rate": 3.547524394651247e-05,
"loss": 2.2293,
"step": 1640,
"task_loss": 2.133748769760132
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9118821620941162,
"epoch": 0.6,
"learning_rate": 3.569208529092881e-05,
"loss": 2.1774,
"step": 1650,
"task_loss": 0.9966872334480286
},
{
"compression_loss": 0.0,
"distillation_loss": 2.572476863861084,
"epoch": 0.6,
"learning_rate": 3.590892663534514e-05,
"loss": 2.057,
"step": 1660,
"task_loss": 1.7666373252868652
},
{
"compression_loss": 0.0,
"distillation_loss": 2.8772165775299072,
"epoch": 0.6,
"learning_rate": 3.6125767979761474e-05,
"loss": 2.1655,
"step": 1670,
"task_loss": 1.931375503540039
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8401541709899902,
"epoch": 0.61,
"learning_rate": 3.634260932417781e-05,
"loss": 1.9505,
"step": 1680,
"task_loss": 1.2190885543823242
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8582175970077515,
"epoch": 0.61,
"learning_rate": 3.6559450668594145e-05,
"loss": 2.076,
"step": 1690,
"task_loss": 1.3368051052093506
},
{
"compression_loss": 0.0,
"distillation_loss": 2.614755153656006,
"epoch": 0.61,
"learning_rate": 3.677629201301048e-05,
"loss": 2.3466,
"step": 1700,
"task_loss": 1.8578553199768066
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2779784202575684,
"epoch": 0.62,
"learning_rate": 3.6993133357426816e-05,
"loss": 2.1923,
"step": 1710,
"task_loss": 1.5653705596923828
},
{
"compression_loss": 0.0,
"distillation_loss": 2.77517032623291,
"epoch": 0.62,
"learning_rate": 3.720997470184315e-05,
"loss": 2.0672,
"step": 1720,
"task_loss": 1.7407598495483398
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3437767028808594,
"epoch": 0.63,
"learning_rate": 3.742681604625949e-05,
"loss": 2.2019,
"step": 1730,
"task_loss": 1.8152300119400024
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2300639152526855,
"epoch": 0.63,
"learning_rate": 3.764365739067582e-05,
"loss": 1.9345,
"step": 1740,
"task_loss": 1.5370993614196777
},
{
"compression_loss": 0.0,
"distillation_loss": 2.157289981842041,
"epoch": 0.63,
"learning_rate": 3.786049873509216e-05,
"loss": 1.8016,
"step": 1750,
"task_loss": 1.4095211029052734
},
{
"epoch": 0.63,
"eval_exact_match": 71.9205298013245,
"eval_f1": 82.04179007964369,
"step": 1750
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6568679809570312,
"epoch": 0.64,
"learning_rate": 3.8077340079508494e-05,
"loss": 2.1419,
"step": 1760,
"task_loss": 1.0038526058197021
},
{
"compression_loss": 0.0,
"distillation_loss": 2.557297706604004,
"epoch": 0.64,
"learning_rate": 3.829418142392483e-05,
"loss": 1.8646,
"step": 1770,
"task_loss": 1.6112699508666992
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2386903762817383,
"epoch": 0.64,
"learning_rate": 3.8511022768341165e-05,
"loss": 2.1456,
"step": 1780,
"task_loss": 1.4335222244262695
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2769885063171387,
"epoch": 0.65,
"learning_rate": 3.87278641127575e-05,
"loss": 2.0989,
"step": 1790,
"task_loss": 1.8141961097717285
},
{
"compression_loss": 0.0,
"distillation_loss": 2.8742246627807617,
"epoch": 0.65,
"learning_rate": 3.894470545717383e-05,
"loss": 2.258,
"step": 1800,
"task_loss": 1.5317169427871704
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2876062393188477,
"epoch": 0.65,
"learning_rate": 3.916154680159017e-05,
"loss": 1.9455,
"step": 1810,
"task_loss": 1.7473857402801514
},
{
"compression_loss": 0.0,
"distillation_loss": 2.066962718963623,
"epoch": 0.66,
"learning_rate": 3.93783881460065e-05,
"loss": 1.8893,
"step": 1820,
"task_loss": 1.0266292095184326
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6544253826141357,
"epoch": 0.66,
"learning_rate": 3.959522949042284e-05,
"loss": 2.0365,
"step": 1830,
"task_loss": 1.080996036529541
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3459901809692383,
"epoch": 0.66,
"learning_rate": 3.981207083483917e-05,
"loss": 1.9773,
"step": 1840,
"task_loss": 1.5108176469802856
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3785009384155273,
"epoch": 0.67,
"learning_rate": 4.0028912179255514e-05,
"loss": 2.0769,
"step": 1850,
"task_loss": 2.11570405960083
},
{
"compression_loss": 0.0,
"distillation_loss": 2.0890772342681885,
"epoch": 0.67,
"learning_rate": 4.024575352367185e-05,
"loss": 1.9434,
"step": 1860,
"task_loss": 1.0435796976089478
},
{
"compression_loss": 0.0,
"distillation_loss": 1.71674382686615,
"epoch": 0.68,
"learning_rate": 4.0462594868088185e-05,
"loss": 1.9389,
"step": 1870,
"task_loss": 1.1821942329406738
},
{
"compression_loss": 0.0,
"distillation_loss": 2.101773500442505,
"epoch": 0.68,
"learning_rate": 4.067943621250452e-05,
"loss": 2.0819,
"step": 1880,
"task_loss": 1.3899006843566895
},
{
"compression_loss": 0.0,
"distillation_loss": 2.432915687561035,
"epoch": 0.68,
"learning_rate": 4.0896277556920857e-05,
"loss": 1.9742,
"step": 1890,
"task_loss": 1.5817327499389648
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6859402656555176,
"epoch": 0.69,
"learning_rate": 4.111311890133719e-05,
"loss": 2.0378,
"step": 1900,
"task_loss": 0.9370235800743103
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6273126602172852,
"epoch": 0.69,
"learning_rate": 4.132996024575352e-05,
"loss": 1.9215,
"step": 1910,
"task_loss": 1.2056360244750977
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7799351215362549,
"epoch": 0.69,
"learning_rate": 4.154680159016986e-05,
"loss": 1.9668,
"step": 1920,
"task_loss": 0.8631846904754639
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9689221382141113,
"epoch": 0.7,
"learning_rate": 4.176364293458619e-05,
"loss": 1.9988,
"step": 1930,
"task_loss": 1.3810503482818604
},
{
"compression_loss": 0.0,
"distillation_loss": 1.826797366142273,
"epoch": 0.7,
"learning_rate": 4.1980484279002534e-05,
"loss": 2.0241,
"step": 1940,
"task_loss": 1.1184316873550415
},
{
"compression_loss": 0.0,
"distillation_loss": 2.8849117755889893,
"epoch": 0.7,
"learning_rate": 4.219732562341886e-05,
"loss": 2.0848,
"step": 1950,
"task_loss": 1.642095923423767
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6249830722808838,
"epoch": 0.71,
"learning_rate": 4.2414166967835206e-05,
"loss": 1.9932,
"step": 1960,
"task_loss": 1.0573899745941162
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3523120880126953,
"epoch": 0.71,
"learning_rate": 4.2631008312251534e-05,
"loss": 1.7783,
"step": 1970,
"task_loss": 0.67606520652771
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1321749687194824,
"epoch": 0.72,
"learning_rate": 4.284784965666788e-05,
"loss": 1.7797,
"step": 1980,
"task_loss": 1.814510464668274
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4253501892089844,
"epoch": 0.72,
"learning_rate": 4.3064691001084205e-05,
"loss": 1.9431,
"step": 1990,
"task_loss": 0.946668803691864
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3535895347595215,
"epoch": 0.72,
"learning_rate": 4.328153234550055e-05,
"loss": 1.9957,
"step": 2000,
"task_loss": 1.8486053943634033
},
{
"epoch": 0.72,
"eval_exact_match": 73.1693472090823,
"eval_f1": 83.02894145240745,
"step": 2000
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8838860988616943,
"epoch": 0.73,
"learning_rate": 4.349837368991688e-05,
"loss": 1.9315,
"step": 2010,
"task_loss": 1.5416061878204346
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8987743854522705,
"epoch": 0.73,
"learning_rate": 4.371521503433322e-05,
"loss": 1.9744,
"step": 2020,
"task_loss": 1.4941067695617676
},
{
"compression_loss": 0.0,
"distillation_loss": 1.972581386566162,
"epoch": 0.73,
"learning_rate": 4.393205637874955e-05,
"loss": 1.8702,
"step": 2030,
"task_loss": 1.1873316764831543
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1103854179382324,
"epoch": 0.74,
"learning_rate": 4.414889772316588e-05,
"loss": 1.7989,
"step": 2040,
"task_loss": 1.9999923706054688
},
{
"compression_loss": 0.0,
"distillation_loss": 2.1972813606262207,
"epoch": 0.74,
"learning_rate": 4.436573906758222e-05,
"loss": 2.0754,
"step": 2050,
"task_loss": 1.3373115062713623
},
{
"compression_loss": 0.0,
"distillation_loss": 1.598461627960205,
"epoch": 0.74,
"learning_rate": 4.4582580411998554e-05,
"loss": 1.9482,
"step": 2060,
"task_loss": 0.7865440845489502
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3904833793640137,
"epoch": 0.75,
"learning_rate": 4.479942175641489e-05,
"loss": 1.9746,
"step": 2070,
"task_loss": 1.9388031959533691
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6508259773254395,
"epoch": 0.75,
"learning_rate": 4.5016263100831226e-05,
"loss": 1.9099,
"step": 2080,
"task_loss": 1.0399653911590576
},
{
"compression_loss": 0.0,
"distillation_loss": 1.772383451461792,
"epoch": 0.76,
"learning_rate": 4.523310444524756e-05,
"loss": 1.7396,
"step": 2090,
"task_loss": 0.9913707375526428
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4372773170471191,
"epoch": 0.76,
"learning_rate": 4.54499457896639e-05,
"loss": 1.6997,
"step": 2100,
"task_loss": 0.9053939580917358
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8861255645751953,
"epoch": 0.76,
"learning_rate": 4.566678713408023e-05,
"loss": 1.9588,
"step": 2110,
"task_loss": 1.1382219791412354
},
{
"compression_loss": 0.0,
"distillation_loss": 1.492857575416565,
"epoch": 0.77,
"learning_rate": 4.588362847849657e-05,
"loss": 2.0162,
"step": 2120,
"task_loss": 1.0419156551361084
},
{
"compression_loss": 0.0,
"distillation_loss": 2.141202926635742,
"epoch": 0.77,
"learning_rate": 4.6100469822912903e-05,
"loss": 1.9327,
"step": 2130,
"task_loss": 1.0418000221252441
},
{
"compression_loss": 0.0,
"distillation_loss": 1.920304536819458,
"epoch": 0.77,
"learning_rate": 4.631731116732924e-05,
"loss": 1.8879,
"step": 2140,
"task_loss": 1.1978967189788818
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5354628562927246,
"epoch": 0.78,
"learning_rate": 4.6534152511745575e-05,
"loss": 1.7451,
"step": 2150,
"task_loss": 0.8436750173568726
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4595377445220947,
"epoch": 0.78,
"learning_rate": 4.675099385616191e-05,
"loss": 1.7424,
"step": 2160,
"task_loss": 1.038141131401062
},
{
"compression_loss": 0.0,
"distillation_loss": 2.34226393699646,
"epoch": 0.78,
"learning_rate": 4.696783520057824e-05,
"loss": 1.7037,
"step": 2170,
"task_loss": 1.9839649200439453
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8102070093154907,
"epoch": 0.79,
"learning_rate": 4.718467654499458e-05,
"loss": 1.7556,
"step": 2180,
"task_loss": 1.7777656316757202
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5364153385162354,
"epoch": 0.79,
"learning_rate": 4.740151788941091e-05,
"loss": 1.8958,
"step": 2190,
"task_loss": 0.9634038805961609
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3901851177215576,
"epoch": 0.8,
"learning_rate": 4.761835923382725e-05,
"loss": 1.8381,
"step": 2200,
"task_loss": 1.4805151224136353
},
{
"compression_loss": 0.0,
"distillation_loss": 1.725096344947815,
"epoch": 0.8,
"learning_rate": 4.783520057824358e-05,
"loss": 1.6794,
"step": 2210,
"task_loss": 1.4682140350341797
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2090702056884766,
"epoch": 0.8,
"learning_rate": 4.8052041922659924e-05,
"loss": 1.9593,
"step": 2220,
"task_loss": 1.230177402496338
},
{
"compression_loss": 0.0,
"distillation_loss": 2.041393756866455,
"epoch": 0.81,
"learning_rate": 4.826888326707626e-05,
"loss": 1.809,
"step": 2230,
"task_loss": 1.3010313510894775
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7629666328430176,
"epoch": 0.81,
"learning_rate": 4.8485724611492595e-05,
"loss": 1.9261,
"step": 2240,
"task_loss": 0.9947839379310608
},
{
"compression_loss": 0.0,
"distillation_loss": 2.264925003051758,
"epoch": 0.81,
"learning_rate": 4.870256595590893e-05,
"loss": 1.902,
"step": 2250,
"task_loss": 1.4164844751358032
},
{
"epoch": 0.81,
"eval_exact_match": 74.87228003784296,
"eval_f1": 84.24213227343729,
"step": 2250
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6021579504013062,
"epoch": 0.82,
"learning_rate": 4.8919407300325266e-05,
"loss": 1.7243,
"step": 2260,
"task_loss": 1.630518913269043
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2044320106506348,
"epoch": 0.82,
"learning_rate": 4.91362486447416e-05,
"loss": 1.7822,
"step": 2270,
"task_loss": 1.8210062980651855
},
{
"compression_loss": 0.0,
"distillation_loss": 1.79202401638031,
"epoch": 0.82,
"learning_rate": 4.935308998915793e-05,
"loss": 1.7531,
"step": 2280,
"task_loss": 1.189091444015503
},
{
"compression_loss": 0.0,
"distillation_loss": 2.06649112701416,
"epoch": 0.83,
"learning_rate": 4.956993133357427e-05,
"loss": 1.844,
"step": 2290,
"task_loss": 1.1661924123764038
},
{
"compression_loss": 0.0,
"distillation_loss": 1.682025671005249,
"epoch": 0.83,
"learning_rate": 4.97867726779906e-05,
"loss": 1.7941,
"step": 2300,
"task_loss": 1.0544310808181763
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3249014616012573,
"epoch": 0.83,
"learning_rate": 5.0003614022406944e-05,
"loss": 1.7689,
"step": 2310,
"task_loss": 0.8618177771568298
},
{
"compression_loss": 0.0,
"distillation_loss": 2.0047874450683594,
"epoch": 0.84,
"learning_rate": 5.022045536682327e-05,
"loss": 1.6503,
"step": 2320,
"task_loss": 1.8260812759399414
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6946525573730469,
"epoch": 0.84,
"learning_rate": 5.0437296711239615e-05,
"loss": 1.6434,
"step": 2330,
"task_loss": 0.9825450778007507
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3918914794921875,
"epoch": 0.85,
"learning_rate": 5.0654138055655944e-05,
"loss": 1.7978,
"step": 2340,
"task_loss": 1.3643735647201538
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7238832712173462,
"epoch": 0.85,
"learning_rate": 5.0870979400072286e-05,
"loss": 1.7657,
"step": 2350,
"task_loss": 1.165932536125183
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7872267961502075,
"epoch": 0.85,
"learning_rate": 5.1087820744488615e-05,
"loss": 1.7084,
"step": 2360,
"task_loss": 1.5586845874786377
},
{
"compression_loss": 0.0,
"distillation_loss": 2.0670828819274902,
"epoch": 0.86,
"learning_rate": 5.130466208890496e-05,
"loss": 1.7082,
"step": 2370,
"task_loss": 1.581449270248413
},
{
"compression_loss": 0.0,
"distillation_loss": 1.378608226776123,
"epoch": 0.86,
"learning_rate": 5.1521503433321286e-05,
"loss": 1.755,
"step": 2380,
"task_loss": 0.980684757232666
},
{
"compression_loss": 0.0,
"distillation_loss": 2.0133185386657715,
"epoch": 0.86,
"learning_rate": 5.173834477773763e-05,
"loss": 1.8557,
"step": 2390,
"task_loss": 1.6304588317871094
},
{
"compression_loss": 0.0,
"distillation_loss": 1.729528546333313,
"epoch": 0.87,
"learning_rate": 5.195518612215396e-05,
"loss": 1.7122,
"step": 2400,
"task_loss": 1.540935754776001
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3772521018981934,
"epoch": 0.87,
"learning_rate": 5.217202746657029e-05,
"loss": 1.7996,
"step": 2410,
"task_loss": 0.889678955078125
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8892799615859985,
"epoch": 0.87,
"learning_rate": 5.238886881098663e-05,
"loss": 1.8532,
"step": 2420,
"task_loss": 1.5641158819198608
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5366939306259155,
"epoch": 0.88,
"learning_rate": 5.2605710155402964e-05,
"loss": 1.5529,
"step": 2430,
"task_loss": 0.9511038661003113
},
{
"compression_loss": 0.0,
"distillation_loss": 3.0125789642333984,
"epoch": 0.88,
"learning_rate": 5.28225514998193e-05,
"loss": 1.7891,
"step": 2440,
"task_loss": 2.29156756401062
},
{
"compression_loss": 0.0,
"distillation_loss": 2.3766469955444336,
"epoch": 0.89,
"learning_rate": 5.3039392844235635e-05,
"loss": 1.6349,
"step": 2450,
"task_loss": 1.3916804790496826
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5166912078857422,
"epoch": 0.89,
"learning_rate": 5.325623418865197e-05,
"loss": 1.7089,
"step": 2460,
"task_loss": 1.1395492553710938
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4228307008743286,
"epoch": 0.89,
"learning_rate": 5.3473075533068306e-05,
"loss": 1.4976,
"step": 2470,
"task_loss": 1.0344961881637573
},
{
"compression_loss": 0.0,
"distillation_loss": 2.241983413696289,
"epoch": 0.9,
"learning_rate": 5.368991687748464e-05,
"loss": 1.5189,
"step": 2480,
"task_loss": 1.3167787790298462
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9524130821228027,
"epoch": 0.9,
"learning_rate": 5.390675822190098e-05,
"loss": 1.6949,
"step": 2490,
"task_loss": 1.677308201789856
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7088284492492676,
"epoch": 0.9,
"learning_rate": 5.412359956631731e-05,
"loss": 1.7772,
"step": 2500,
"task_loss": 1.0601410865783691
},
{
"epoch": 0.9,
"eval_exact_match": 74.76821192052981,
"eval_f1": 84.28227498844669,
"step": 2500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7284023761749268,
"epoch": 0.91,
"learning_rate": 5.434044091073365e-05,
"loss": 1.7942,
"step": 2510,
"task_loss": 1.2665657997131348
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8622101545333862,
"epoch": 0.91,
"learning_rate": 5.455728225514998e-05,
"loss": 1.6878,
"step": 2520,
"task_loss": 1.2940951585769653
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3830608129501343,
"epoch": 0.91,
"learning_rate": 5.477412359956632e-05,
"loss": 1.628,
"step": 2530,
"task_loss": 1.0994198322296143
},
{
"compression_loss": 0.0,
"distillation_loss": 2.014238119125366,
"epoch": 0.92,
"learning_rate": 5.499096494398265e-05,
"loss": 1.5324,
"step": 2540,
"task_loss": 1.940596103668213
},
{
"compression_loss": 0.0,
"distillation_loss": 2.274725914001465,
"epoch": 0.92,
"learning_rate": 5.520780628839899e-05,
"loss": 1.6801,
"step": 2550,
"task_loss": 1.9991852045059204
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5515773296356201,
"epoch": 0.93,
"learning_rate": 5.542464763281532e-05,
"loss": 1.649,
"step": 2560,
"task_loss": 1.3755850791931152
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5370211601257324,
"epoch": 0.93,
"learning_rate": 5.564148897723166e-05,
"loss": 1.6046,
"step": 2570,
"task_loss": 1.4268473386764526
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6613398790359497,
"epoch": 0.93,
"learning_rate": 5.585833032164799e-05,
"loss": 1.5996,
"step": 2580,
"task_loss": 0.9327517151832581
},
{
"compression_loss": 0.0,
"distillation_loss": 1.47848641872406,
"epoch": 0.94,
"learning_rate": 5.607517166606433e-05,
"loss": 1.713,
"step": 2590,
"task_loss": 0.9023648500442505
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7783399820327759,
"epoch": 0.94,
"learning_rate": 5.629201301048067e-05,
"loss": 1.6175,
"step": 2600,
"task_loss": 1.1293476819992065
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9461766481399536,
"epoch": 0.94,
"learning_rate": 5.6508854354897004e-05,
"loss": 1.6266,
"step": 2610,
"task_loss": 1.0467326641082764
},
{
"compression_loss": 0.0,
"distillation_loss": 1.567404866218567,
"epoch": 0.95,
"learning_rate": 5.672569569931334e-05,
"loss": 1.8742,
"step": 2620,
"task_loss": 1.0961357355117798
},
{
"compression_loss": 0.0,
"distillation_loss": 1.381565809249878,
"epoch": 0.95,
"learning_rate": 5.6942537043729675e-05,
"loss": 1.6772,
"step": 2630,
"task_loss": 1.4862456321716309
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3669390678405762,
"epoch": 0.95,
"learning_rate": 5.715937838814601e-05,
"loss": 1.6426,
"step": 2640,
"task_loss": 1.7144615650177002
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4433438777923584,
"epoch": 0.96,
"learning_rate": 5.737621973256234e-05,
"loss": 1.5835,
"step": 2650,
"task_loss": 1.1417357921600342
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4862744808197021,
"epoch": 0.96,
"learning_rate": 5.759306107697868e-05,
"loss": 1.7158,
"step": 2660,
"task_loss": 0.9948188066482544
},
{
"compression_loss": 0.0,
"distillation_loss": 1.210496187210083,
"epoch": 0.96,
"learning_rate": 5.780990242139501e-05,
"loss": 1.796,
"step": 2670,
"task_loss": 1.136523723602295
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4428620338439941,
"epoch": 0.97,
"learning_rate": 5.802674376581135e-05,
"loss": 1.632,
"step": 2680,
"task_loss": 0.7866153120994568
},
{
"compression_loss": 0.0,
"distillation_loss": 1.05281662940979,
"epoch": 0.97,
"learning_rate": 5.824358511022768e-05,
"loss": 1.5928,
"step": 2690,
"task_loss": 0.7293530106544495
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4683610200881958,
"epoch": 0.98,
"learning_rate": 5.8460426454644024e-05,
"loss": 1.4844,
"step": 2700,
"task_loss": 0.6756585836410522
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5305559635162354,
"epoch": 0.98,
"learning_rate": 5.867726779906035e-05,
"loss": 1.5709,
"step": 2710,
"task_loss": 1.3757680654525757
},
{
"compression_loss": 0.0,
"distillation_loss": 1.54231595993042,
"epoch": 0.98,
"learning_rate": 5.8894109143476696e-05,
"loss": 1.5264,
"step": 2720,
"task_loss": 1.2477138042449951
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7188405990600586,
"epoch": 0.99,
"learning_rate": 5.9110950487893024e-05,
"loss": 1.6459,
"step": 2730,
"task_loss": 1.166077971458435
},
{
"compression_loss": 0.0,
"distillation_loss": 2.8873350620269775,
"epoch": 0.99,
"learning_rate": 5.932779183230937e-05,
"loss": 1.7529,
"step": 2740,
"task_loss": 1.92569899559021
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3297995328903198,
"epoch": 0.99,
"learning_rate": 5.9544633176725695e-05,
"loss": 1.3638,
"step": 2750,
"task_loss": 1.0652023553848267
},
{
"epoch": 0.99,
"eval_exact_match": 76.26300851466415,
"eval_f1": 85.48165328587461,
"step": 2750
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8641085624694824,
"epoch": 1.0,
"learning_rate": 5.976147452114203e-05,
"loss": 1.628,
"step": 2760,
"task_loss": 1.1830246448516846
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2197985649108887,
"epoch": 1.0,
"learning_rate": 5.9978315865558367e-05,
"loss": 1.4132,
"step": 2770,
"task_loss": 1.025228500366211
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6511722803115845,
"epoch": 1.0,
"learning_rate": 5.999995649340457e-05,
"loss": 1.4993,
"step": 2780,
"task_loss": 1.00981605052948
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4045798778533936,
"epoch": 1.01,
"learning_rate": 5.9999806100397186e-05,
"loss": 1.2535,
"step": 2790,
"task_loss": 0.8313103318214417
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8121707439422607,
"epoch": 1.01,
"learning_rate": 5.999954828439778e-05,
"loss": 1.4897,
"step": 2800,
"task_loss": 1.642676830291748
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6657578945159912,
"epoch": 1.02,
"learning_rate": 5.999918304632955e-05,
"loss": 1.5951,
"step": 2810,
"task_loss": 1.2371957302093506
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5906226634979248,
"epoch": 1.02,
"learning_rate": 5.999871038750032e-05,
"loss": 1.4819,
"step": 2820,
"task_loss": 1.2596104145050049
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4319016933441162,
"epoch": 1.02,
"learning_rate": 5.999813030960259e-05,
"loss": 1.4997,
"step": 2830,
"task_loss": 1.103452205657959
},
{
"compression_loss": 0.0,
"distillation_loss": 1.490473985671997,
"epoch": 1.03,
"learning_rate": 5.99974428147135e-05,
"loss": 1.496,
"step": 2840,
"task_loss": 1.228161096572876
},
{
"compression_loss": 0.0,
"distillation_loss": 1.737858772277832,
"epoch": 1.03,
"learning_rate": 5.999664790529482e-05,
"loss": 1.3197,
"step": 2850,
"task_loss": 1.2385176420211792
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8279550075531006,
"epoch": 1.03,
"learning_rate": 5.9995745584192954e-05,
"loss": 1.5408,
"step": 2860,
"task_loss": 1.2323086261749268
},
{
"compression_loss": 0.0,
"distillation_loss": 2.0422871112823486,
"epoch": 1.04,
"learning_rate": 5.9994735854638916e-05,
"loss": 1.5051,
"step": 2870,
"task_loss": 1.5764563083648682
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6473441123962402,
"epoch": 1.04,
"learning_rate": 5.999361872024835e-05,
"loss": 1.431,
"step": 2880,
"task_loss": 1.0629370212554932
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0594135522842407,
"epoch": 1.04,
"learning_rate": 5.9992394185021474e-05,
"loss": 1.3824,
"step": 2890,
"task_loss": 0.8771895170211792
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4817508459091187,
"epoch": 1.05,
"learning_rate": 5.999106225334308e-05,
"loss": 1.3694,
"step": 2900,
"task_loss": 0.8263894319534302
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1033601760864258,
"epoch": 1.05,
"learning_rate": 5.998962292998255e-05,
"loss": 1.3645,
"step": 2910,
"task_loss": 1.4630831480026245
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2878718376159668,
"epoch": 1.06,
"learning_rate": 5.99880762200938e-05,
"loss": 1.4124,
"step": 2920,
"task_loss": 0.641461968421936
},
{
"compression_loss": 0.0,
"distillation_loss": 2.33697772026062,
"epoch": 1.06,
"learning_rate": 5.9986422129215255e-05,
"loss": 1.5289,
"step": 2930,
"task_loss": 1.9020227193832397
},
{
"compression_loss": 0.0,
"distillation_loss": 1.129021167755127,
"epoch": 1.06,
"learning_rate": 5.998466066326988e-05,
"loss": 1.4724,
"step": 2940,
"task_loss": 1.0405011177062988
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0331007242202759,
"epoch": 1.07,
"learning_rate": 5.998279182856511e-05,
"loss": 1.3023,
"step": 2950,
"task_loss": 0.7486757040023804
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3960323333740234,
"epoch": 1.07,
"learning_rate": 5.9980815631792844e-05,
"loss": 1.483,
"step": 2960,
"task_loss": 1.3152785301208496
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7268671989440918,
"epoch": 1.07,
"learning_rate": 5.997873208002943e-05,
"loss": 1.4653,
"step": 2970,
"task_loss": 1.341980218887329
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3513455390930176,
"epoch": 1.08,
"learning_rate": 5.997654118073564e-05,
"loss": 1.4756,
"step": 2980,
"task_loss": 1.1631968021392822
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6115731000900269,
"epoch": 1.08,
"learning_rate": 5.99742429417566e-05,
"loss": 1.4344,
"step": 2990,
"task_loss": 0.9157006740570068
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3850129842758179,
"epoch": 1.08,
"learning_rate": 5.997183737132184e-05,
"loss": 1.553,
"step": 3000,
"task_loss": 1.205183744430542
},
{
"epoch": 1.08,
"eval_exact_match": 75.96972563859981,
"eval_f1": 85.09594063703909,
"step": 3000
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6631824970245361,
"epoch": 1.09,
"learning_rate": 5.99693244780452e-05,
"loss": 1.4326,
"step": 3010,
"task_loss": 1.2684969902038574
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7456169128417969,
"epoch": 1.09,
"learning_rate": 5.996670427092481e-05,
"loss": 1.4876,
"step": 3020,
"task_loss": 1.0792250633239746
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8971850872039795,
"epoch": 1.1,
"learning_rate": 5.996397675934309e-05,
"loss": 1.6011,
"step": 3030,
"task_loss": 1.1246172189712524
},
{
"compression_loss": 0.0,
"distillation_loss": 1.48433518409729,
"epoch": 1.1,
"learning_rate": 5.996114195306668e-05,
"loss": 1.4407,
"step": 3040,
"task_loss": 1.1288623809814453
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5189406871795654,
"epoch": 1.1,
"learning_rate": 5.995819986224643e-05,
"loss": 1.6002,
"step": 3050,
"task_loss": 1.3538494110107422
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7961808443069458,
"epoch": 1.11,
"learning_rate": 5.995515049741734e-05,
"loss": 1.5678,
"step": 3060,
"task_loss": 1.821069598197937
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3772679567337036,
"epoch": 1.11,
"learning_rate": 5.995199386949855e-05,
"loss": 1.4492,
"step": 3070,
"task_loss": 1.3162784576416016
},
{
"compression_loss": 0.0,
"distillation_loss": 1.128143548965454,
"epoch": 1.11,
"learning_rate": 5.994872998979327e-05,
"loss": 1.4028,
"step": 3080,
"task_loss": 1.0793228149414062
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6217098236083984,
"epoch": 1.12,
"learning_rate": 5.9945358869988796e-05,
"loss": 1.5213,
"step": 3090,
"task_loss": 1.173330307006836
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3088253736495972,
"epoch": 1.12,
"learning_rate": 5.994188052215636e-05,
"loss": 1.3198,
"step": 3100,
"task_loss": 0.9987668395042419
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6880836486816406,
"epoch": 1.12,
"learning_rate": 5.993829495875121e-05,
"loss": 1.3887,
"step": 3110,
"task_loss": 1.2901586294174194
},
{
"compression_loss": 0.0,
"distillation_loss": 1.024364948272705,
"epoch": 1.13,
"learning_rate": 5.9934602192612506e-05,
"loss": 1.2978,
"step": 3120,
"task_loss": 0.765283465385437
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4316623210906982,
"epoch": 1.13,
"learning_rate": 5.9930802236963245e-05,
"loss": 1.5247,
"step": 3130,
"task_loss": 0.8690903186798096
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6122153997421265,
"epoch": 1.13,
"learning_rate": 5.992689510541028e-05,
"loss": 1.42,
"step": 3140,
"task_loss": 1.2866846323013306
},
{
"compression_loss": 0.0,
"distillation_loss": 1.323101282119751,
"epoch": 1.14,
"learning_rate": 5.992288081194423e-05,
"loss": 1.3262,
"step": 3150,
"task_loss": 1.4218419790267944
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2536742687225342,
"epoch": 1.14,
"learning_rate": 5.991875937093943e-05,
"loss": 1.4379,
"step": 3160,
"task_loss": 1.2389776706695557
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0673775672912598,
"epoch": 1.15,
"learning_rate": 5.991453079715389e-05,
"loss": 1.3706,
"step": 3170,
"task_loss": 0.973718523979187
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4817243814468384,
"epoch": 1.15,
"learning_rate": 5.991019510572925e-05,
"loss": 1.3935,
"step": 3180,
"task_loss": 0.9717172384262085
},
{
"compression_loss": 0.0,
"distillation_loss": 1.444387435913086,
"epoch": 1.15,
"learning_rate": 5.990575231219071e-05,
"loss": 1.3291,
"step": 3190,
"task_loss": 1.6736479997634888
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3083770275115967,
"epoch": 1.16,
"learning_rate": 5.9901202432446966e-05,
"loss": 1.1301,
"step": 3200,
"task_loss": 1.0902491807937622
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2770934104919434,
"epoch": 1.16,
"learning_rate": 5.989654548279019e-05,
"loss": 1.3383,
"step": 3210,
"task_loss": 0.8913994431495667
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9612149000167847,
"epoch": 1.16,
"learning_rate": 5.989178147989594e-05,
"loss": 1.2876,
"step": 3220,
"task_loss": 0.866273045539856
},
{
"compression_loss": 0.0,
"distillation_loss": 1.750402569770813,
"epoch": 1.17,
"learning_rate": 5.988691044082309e-05,
"loss": 1.3676,
"step": 3230,
"task_loss": 1.519735336303711
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3474881649017334,
"epoch": 1.17,
"learning_rate": 5.988193238301383e-05,
"loss": 1.1273,
"step": 3240,
"task_loss": 0.7416272163391113
},
{
"compression_loss": 0.0,
"distillation_loss": 1.463036060333252,
"epoch": 1.17,
"learning_rate": 5.987684732429352e-05,
"loss": 1.3382,
"step": 3250,
"task_loss": 1.3737218379974365
},
{
"epoch": 1.17,
"eval_exact_match": 77.60643330179754,
"eval_f1": 86.28265990982867,
"step": 3250
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9102663397789001,
"epoch": 1.18,
"learning_rate": 5.987165528287069e-05,
"loss": 1.3387,
"step": 3260,
"task_loss": 0.9284595251083374
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6090664863586426,
"epoch": 1.18,
"learning_rate": 5.9866356277336964e-05,
"loss": 1.293,
"step": 3270,
"task_loss": 1.4329043626785278
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5263841152191162,
"epoch": 1.19,
"learning_rate": 5.9860950326666935e-05,
"loss": 1.4872,
"step": 3280,
"task_loss": 1.0852687358856201
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2740449905395508,
"epoch": 1.19,
"learning_rate": 5.985543745021821e-05,
"loss": 1.3863,
"step": 3290,
"task_loss": 1.306997537612915
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2338653802871704,
"epoch": 1.19,
"learning_rate": 5.98498176677312e-05,
"loss": 1.2642,
"step": 3300,
"task_loss": 0.6805293560028076
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0945351123809814,
"epoch": 1.2,
"learning_rate": 5.98440909993292e-05,
"loss": 1.2649,
"step": 3310,
"task_loss": 0.9938749074935913
},
{
"compression_loss": 0.0,
"distillation_loss": 2.241112232208252,
"epoch": 1.2,
"learning_rate": 5.983825746551817e-05,
"loss": 1.4417,
"step": 3320,
"task_loss": 2.230130910873413
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2390515804290771,
"epoch": 1.2,
"learning_rate": 5.9832317087186795e-05,
"loss": 1.368,
"step": 3330,
"task_loss": 0.8296461701393127
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5527942180633545,
"epoch": 1.21,
"learning_rate": 5.982626988560631e-05,
"loss": 1.4451,
"step": 3340,
"task_loss": 1.1470730304718018
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8781901001930237,
"epoch": 1.21,
"learning_rate": 5.9820115882430476e-05,
"loss": 1.3569,
"step": 3350,
"task_loss": 0.6853946447372437
},
{
"compression_loss": 0.0,
"distillation_loss": 1.358176350593567,
"epoch": 1.21,
"learning_rate": 5.981385509969547e-05,
"loss": 1.2828,
"step": 3360,
"task_loss": 1.4298112392425537
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2665464878082275,
"epoch": 1.22,
"learning_rate": 5.980748755981984e-05,
"loss": 1.3211,
"step": 3370,
"task_loss": 1.4579181671142578
},
{
"compression_loss": 0.0,
"distillation_loss": 0.965965211391449,
"epoch": 1.22,
"learning_rate": 5.980101328560442e-05,
"loss": 1.3321,
"step": 3380,
"task_loss": 0.7342075109481812
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5226565599441528,
"epoch": 1.23,
"learning_rate": 5.979443230023221e-05,
"loss": 1.4229,
"step": 3390,
"task_loss": 1.4543688297271729
},
{
"compression_loss": 0.0,
"distillation_loss": 1.332014799118042,
"epoch": 1.23,
"learning_rate": 5.978774462726834e-05,
"loss": 1.3364,
"step": 3400,
"task_loss": 0.9676311612129211
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0038352012634277,
"epoch": 1.23,
"learning_rate": 5.9780950290659965e-05,
"loss": 1.2919,
"step": 3410,
"task_loss": 0.9453527331352234
},
{
"compression_loss": 0.0,
"distillation_loss": 1.033329725265503,
"epoch": 1.24,
"learning_rate": 5.977404931473615e-05,
"loss": 1.2794,
"step": 3420,
"task_loss": 0.5825457572937012
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0678160190582275,
"epoch": 1.24,
"learning_rate": 5.976704172420787e-05,
"loss": 1.2897,
"step": 3430,
"task_loss": 1.085493803024292
},
{
"compression_loss": 0.0,
"distillation_loss": 1.299585223197937,
"epoch": 1.24,
"learning_rate": 5.975992754416782e-05,
"loss": 1.2425,
"step": 3440,
"task_loss": 0.9612342119216919
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2164323329925537,
"epoch": 1.25,
"learning_rate": 5.975270680009036e-05,
"loss": 1.3221,
"step": 3450,
"task_loss": 1.375262975692749
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5335781574249268,
"epoch": 1.25,
"learning_rate": 5.974537951783148e-05,
"loss": 1.4045,
"step": 3460,
"task_loss": 1.7295887470245361
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2506041526794434,
"epoch": 1.25,
"learning_rate": 5.9737945723628635e-05,
"loss": 1.2224,
"step": 3470,
"task_loss": 1.2525482177734375
},
{
"compression_loss": 0.0,
"distillation_loss": 2.2599070072174072,
"epoch": 1.26,
"learning_rate": 5.973040544410066e-05,
"loss": 1.3498,
"step": 3480,
"task_loss": 1.7536072731018066
},
{
"compression_loss": 0.0,
"distillation_loss": 1.718400239944458,
"epoch": 1.26,
"learning_rate": 5.972275870624773e-05,
"loss": 1.2841,
"step": 3490,
"task_loss": 1.339259147644043
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5253331661224365,
"epoch": 1.26,
"learning_rate": 5.971500553745119e-05,
"loss": 1.4498,
"step": 3500,
"task_loss": 0.9578819274902344
},
{
"epoch": 1.26,
"eval_exact_match": 78.38221381267739,
"eval_f1": 86.75896485683346,
"step": 3500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6366232633590698,
"epoch": 1.27,
"learning_rate": 5.9707145965473516e-05,
"loss": 1.4598,
"step": 3510,
"task_loss": 1.8735042810440063
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0762263536453247,
"epoch": 1.27,
"learning_rate": 5.969918001845817e-05,
"loss": 1.2466,
"step": 3520,
"task_loss": 0.8542821407318115
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3200879096984863,
"epoch": 1.28,
"learning_rate": 5.969110772492954e-05,
"loss": 1.269,
"step": 3530,
"task_loss": 1.0112990140914917
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1145355701446533,
"epoch": 1.28,
"learning_rate": 5.968292911379281e-05,
"loss": 1.4295,
"step": 3540,
"task_loss": 0.8630874752998352
},
{
"compression_loss": 0.0,
"distillation_loss": 1.371903657913208,
"epoch": 1.28,
"learning_rate": 5.967464421433385e-05,
"loss": 1.3867,
"step": 3550,
"task_loss": 1.1606991291046143
},
{
"compression_loss": 0.0,
"distillation_loss": 1.486452341079712,
"epoch": 1.29,
"learning_rate": 5.966625305621916e-05,
"loss": 1.2152,
"step": 3560,
"task_loss": 1.3023685216903687
},
{
"compression_loss": 0.0,
"distillation_loss": 1.478879690170288,
"epoch": 1.29,
"learning_rate": 5.965775566949571e-05,
"loss": 1.238,
"step": 3570,
"task_loss": 1.165961503982544
},
{
"compression_loss": 0.0,
"distillation_loss": 1.483465552330017,
"epoch": 1.29,
"learning_rate": 5.964915208459085e-05,
"loss": 1.3574,
"step": 3580,
"task_loss": 1.0961742401123047
},
{
"compression_loss": 0.0,
"distillation_loss": 1.294953465461731,
"epoch": 1.3,
"learning_rate": 5.9640442332312195e-05,
"loss": 1.2928,
"step": 3590,
"task_loss": 0.8126479387283325
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0170869827270508,
"epoch": 1.3,
"learning_rate": 5.963162644384755e-05,
"loss": 1.1543,
"step": 3600,
"task_loss": 0.8312715291976929
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5762726068496704,
"epoch": 1.3,
"learning_rate": 5.9622704450764756e-05,
"loss": 1.2988,
"step": 3610,
"task_loss": 0.9494091272354126
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0313997268676758,
"epoch": 1.31,
"learning_rate": 5.9613676385011585e-05,
"loss": 1.1966,
"step": 3620,
"task_loss": 0.8011406660079956
},
{
"compression_loss": 0.0,
"distillation_loss": 1.594609260559082,
"epoch": 1.31,
"learning_rate": 5.960454227891564e-05,
"loss": 1.4918,
"step": 3630,
"task_loss": 1.3429150581359863
},
{
"compression_loss": 0.0,
"distillation_loss": 1.721564531326294,
"epoch": 1.32,
"learning_rate": 5.9595302165184246e-05,
"loss": 1.423,
"step": 3640,
"task_loss": 1.5580472946166992
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9121531248092651,
"epoch": 1.32,
"learning_rate": 5.9585956076904284e-05,
"loss": 1.3897,
"step": 3650,
"task_loss": 1.109339952468872
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6109825372695923,
"epoch": 1.32,
"learning_rate": 5.9576504047542156e-05,
"loss": 1.41,
"step": 3660,
"task_loss": 0.9962501525878906
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4419324398040771,
"epoch": 1.33,
"learning_rate": 5.956694611094356e-05,
"loss": 1.2896,
"step": 3670,
"task_loss": 0.6805934906005859
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3609169721603394,
"epoch": 1.33,
"learning_rate": 5.955728230133347e-05,
"loss": 1.2873,
"step": 3680,
"task_loss": 0.8458003997802734
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1782557964324951,
"epoch": 1.33,
"learning_rate": 5.954751265331597e-05,
"loss": 1.3991,
"step": 3690,
"task_loss": 1.114344835281372
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5857070684432983,
"epoch": 1.34,
"learning_rate": 5.9537637201874086e-05,
"loss": 1.2406,
"step": 3700,
"task_loss": 1.243434190750122
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4138120412826538,
"epoch": 1.34,
"learning_rate": 5.952765598236975e-05,
"loss": 1.3789,
"step": 3710,
"task_loss": 0.6327986717224121
},
{
"compression_loss": 0.0,
"distillation_loss": 1.529003620147705,
"epoch": 1.34,
"learning_rate": 5.95175690305436e-05,
"loss": 1.3643,
"step": 3720,
"task_loss": 1.2512716054916382
},
{
"compression_loss": 0.0,
"distillation_loss": 1.472851037979126,
"epoch": 1.35,
"learning_rate": 5.950737638251488e-05,
"loss": 1.311,
"step": 3730,
"task_loss": 1.6475411653518677
},
{
"compression_loss": 0.0,
"distillation_loss": 1.9043711423873901,
"epoch": 1.35,
"learning_rate": 5.9497078074781344e-05,
"loss": 1.3004,
"step": 3740,
"task_loss": 1.6194053888320923
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0727344751358032,
"epoch": 1.36,
"learning_rate": 5.948667414421904e-05,
"loss": 1.2777,
"step": 3750,
"task_loss": 1.061992883682251
},
{
"epoch": 1.36,
"eval_exact_match": 78.76064333017976,
"eval_f1": 87.07184243155801,
"step": 3750
},
{
"compression_loss": 0.0,
"distillation_loss": 1.449849247932434,
"epoch": 1.36,
"learning_rate": 5.947616462808226e-05,
"loss": 1.4524,
"step": 3760,
"task_loss": 0.9752452373504639
},
{
"compression_loss": 0.0,
"distillation_loss": 1.321598768234253,
"epoch": 1.36,
"learning_rate": 5.946554956400337e-05,
"loss": 1.3379,
"step": 3770,
"task_loss": 1.1972038745880127
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2405821084976196,
"epoch": 1.37,
"learning_rate": 5.945482898999269e-05,
"loss": 1.3715,
"step": 3780,
"task_loss": 1.1690441370010376
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1473963260650635,
"epoch": 1.37,
"learning_rate": 5.9444002944438315e-05,
"loss": 1.2458,
"step": 3790,
"task_loss": 0.898491382598877
},
{
"compression_loss": 0.0,
"distillation_loss": 1.310509443283081,
"epoch": 1.37,
"learning_rate": 5.943307146610606e-05,
"loss": 1.353,
"step": 3800,
"task_loss": 1.1752980947494507
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2511919736862183,
"epoch": 1.38,
"learning_rate": 5.942203459413925e-05,
"loss": 1.1887,
"step": 3810,
"task_loss": 1.156741738319397
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2756710052490234,
"epoch": 1.38,
"learning_rate": 5.941089236805858e-05,
"loss": 1.2036,
"step": 3820,
"task_loss": 0.9336721301078796
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8774323463439941,
"epoch": 1.38,
"learning_rate": 5.9399644827762026e-05,
"loss": 1.2673,
"step": 3830,
"task_loss": 1.782167673110962
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3417766094207764,
"epoch": 1.39,
"learning_rate": 5.938829201352467e-05,
"loss": 1.2705,
"step": 3840,
"task_loss": 1.7044801712036133
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4353686571121216,
"epoch": 1.39,
"learning_rate": 5.937683396599854e-05,
"loss": 1.1901,
"step": 3850,
"task_loss": 1.3189786672592163
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0781793594360352,
"epoch": 1.4,
"learning_rate": 5.9365270726212497e-05,
"loss": 1.2764,
"step": 3860,
"task_loss": 0.7336215376853943
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6718968152999878,
"epoch": 1.4,
"learning_rate": 5.935360233557207e-05,
"loss": 1.3173,
"step": 3870,
"task_loss": 1.3569270372390747
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3217222690582275,
"epoch": 1.4,
"learning_rate": 5.934182883585932e-05,
"loss": 1.3197,
"step": 3880,
"task_loss": 1.3436063528060913
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8721194267272949,
"epoch": 1.41,
"learning_rate": 5.9329950269232654e-05,
"loss": 1.2029,
"step": 3890,
"task_loss": 0.7747288942337036
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2571134567260742,
"epoch": 1.41,
"learning_rate": 5.9317966678226725e-05,
"loss": 1.4305,
"step": 3900,
"task_loss": 1.112687110900879
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2334290742874146,
"epoch": 1.41,
"learning_rate": 5.930587810575225e-05,
"loss": 1.3896,
"step": 3910,
"task_loss": 0.8130580186843872
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6533408164978027,
"epoch": 1.42,
"learning_rate": 5.929368459509586e-05,
"loss": 1.1132,
"step": 3920,
"task_loss": 1.5629518032073975
},
{
"compression_loss": 0.0,
"distillation_loss": 1.402529001235962,
"epoch": 1.42,
"learning_rate": 5.9281386189919965e-05,
"loss": 1.1492,
"step": 3930,
"task_loss": 1.2752315998077393
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1418523788452148,
"epoch": 1.42,
"learning_rate": 5.926898293426255e-05,
"loss": 1.2519,
"step": 3940,
"task_loss": 1.0082168579101562
},
{
"compression_loss": 0.0,
"distillation_loss": 1.589359164237976,
"epoch": 1.43,
"learning_rate": 5.925647487253707e-05,
"loss": 1.2353,
"step": 3950,
"task_loss": 1.2787137031555176
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0001301765441895,
"epoch": 1.43,
"learning_rate": 5.9243862049532264e-05,
"loss": 1.3728,
"step": 3960,
"task_loss": 0.9775519967079163
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7581260204315186,
"epoch": 1.43,
"learning_rate": 5.9231144510411994e-05,
"loss": 1.3053,
"step": 3970,
"task_loss": 1.2524868249893188
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1798157691955566,
"epoch": 1.44,
"learning_rate": 5.921832230071508e-05,
"loss": 1.3237,
"step": 3980,
"task_loss": 1.0549383163452148
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2880291938781738,
"epoch": 1.44,
"learning_rate": 5.9205395466355186e-05,
"loss": 1.2282,
"step": 3990,
"task_loss": 0.7487226128578186
},
{
"compression_loss": 0.0,
"distillation_loss": 1.314383625984192,
"epoch": 1.45,
"learning_rate": 5.9192364053620554e-05,
"loss": 1.3069,
"step": 4000,
"task_loss": 1.3157868385314941
},
{
"epoch": 1.45,
"eval_exact_match": 78.62819299905392,
"eval_f1": 87.15414215858682,
"step": 4000
},
{
"compression_loss": 0.0,
"distillation_loss": 0.793086051940918,
"epoch": 1.45,
"learning_rate": 5.917922810917394e-05,
"loss": 1.175,
"step": 4010,
"task_loss": 0.5701212286949158
},
{
"compression_loss": 0.0,
"distillation_loss": 1.295424461364746,
"epoch": 1.45,
"learning_rate": 5.91659876800524e-05,
"loss": 1.2627,
"step": 4020,
"task_loss": 0.9144801497459412
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0756990909576416,
"epoch": 1.46,
"learning_rate": 5.9152642813667135e-05,
"loss": 1.2696,
"step": 4030,
"task_loss": 0.8201438188552856
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3234689235687256,
"epoch": 1.46,
"learning_rate": 5.913919355780329e-05,
"loss": 1.3347,
"step": 4040,
"task_loss": 1.1706106662750244
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0304203033447266,
"epoch": 1.46,
"learning_rate": 5.912563996061981e-05,
"loss": 1.1509,
"step": 4050,
"task_loss": 1.0333361625671387
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8914575576782227,
"epoch": 1.47,
"learning_rate": 5.9111982070649294e-05,
"loss": 1.2647,
"step": 4060,
"task_loss": 1.0233019590377808
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5193976163864136,
"epoch": 1.47,
"learning_rate": 5.909821993679775e-05,
"loss": 1.336,
"step": 4070,
"task_loss": 1.1142679452896118
},
{
"compression_loss": 0.0,
"distillation_loss": 1.550922155380249,
"epoch": 1.47,
"learning_rate": 5.908435360834451e-05,
"loss": 1.2458,
"step": 4080,
"task_loss": 1.194461464881897
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4292857646942139,
"epoch": 1.48,
"learning_rate": 5.9070383134941953e-05,
"loss": 1.3929,
"step": 4090,
"task_loss": 1.3362990617752075
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2451667785644531,
"epoch": 1.48,
"learning_rate": 5.9056308566615434e-05,
"loss": 1.1498,
"step": 4100,
"task_loss": 0.7560177445411682
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4586573839187622,
"epoch": 1.49,
"learning_rate": 5.904212995376298e-05,
"loss": 1.1929,
"step": 4110,
"task_loss": 1.1912312507629395
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1983109712600708,
"epoch": 1.49,
"learning_rate": 5.9027847347155253e-05,
"loss": 1.1934,
"step": 4120,
"task_loss": 1.048568844795227
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7856746912002563,
"epoch": 1.49,
"learning_rate": 5.901346079793525e-05,
"loss": 1.25,
"step": 4130,
"task_loss": 1.6442539691925049
},
{
"compression_loss": 0.0,
"distillation_loss": 1.616405963897705,
"epoch": 1.5,
"learning_rate": 5.899897035761817e-05,
"loss": 1.2341,
"step": 4140,
"task_loss": 1.356785774230957
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2377030849456787,
"epoch": 1.5,
"learning_rate": 5.898437607809124e-05,
"loss": 1.1177,
"step": 4150,
"task_loss": 1.0740208625793457
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0152101516723633,
"epoch": 1.5,
"learning_rate": 5.896967801161349e-05,
"loss": 1.3281,
"step": 4160,
"task_loss": 1.027454137802124
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5116093158721924,
"epoch": 1.51,
"learning_rate": 5.895487621081562e-05,
"loss": 1.2888,
"step": 4170,
"task_loss": 1.2181144952774048
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1471505165100098,
"epoch": 1.51,
"learning_rate": 5.893997072869975e-05,
"loss": 1.1994,
"step": 4180,
"task_loss": 0.8226631879806519
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0915307998657227,
"epoch": 1.51,
"learning_rate": 5.892496161863928e-05,
"loss": 1.2187,
"step": 4190,
"task_loss": 0.5205323100090027
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2426114082336426,
"epoch": 1.52,
"learning_rate": 5.8909848934378674e-05,
"loss": 1.1759,
"step": 4200,
"task_loss": 1.330606460571289
},
{
"compression_loss": 0.0,
"distillation_loss": 0.944818913936615,
"epoch": 1.52,
"learning_rate": 5.889463273003328e-05,
"loss": 1.2345,
"step": 4210,
"task_loss": 0.6433451175689697
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1503543853759766,
"epoch": 1.53,
"learning_rate": 5.887931306008911e-05,
"loss": 1.241,
"step": 4220,
"task_loss": 1.847876787185669
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0494859218597412,
"epoch": 1.53,
"learning_rate": 5.8863889979402696e-05,
"loss": 1.2631,
"step": 4230,
"task_loss": 0.9382550716400146
},
{
"compression_loss": 0.0,
"distillation_loss": 1.224144458770752,
"epoch": 1.53,
"learning_rate": 5.8848363543200816e-05,
"loss": 1.2842,
"step": 4240,
"task_loss": 1.0627460479736328
},
{
"compression_loss": 0.0,
"distillation_loss": 1.124106764793396,
"epoch": 1.54,
"learning_rate": 5.88327338070804e-05,
"loss": 1.3314,
"step": 4250,
"task_loss": 1.1247596740722656
},
{
"epoch": 1.54,
"eval_exact_match": 78.56196783349101,
"eval_f1": 87.1287016330811,
"step": 4250
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6598260402679443,
"epoch": 1.54,
"learning_rate": 5.8817000827008224e-05,
"loss": 1.1831,
"step": 4260,
"task_loss": 1.4295686483383179
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6338021755218506,
"epoch": 1.54,
"learning_rate": 5.88011646593208e-05,
"loss": 1.3631,
"step": 4270,
"task_loss": 1.3802179098129272
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0921931266784668,
"epoch": 1.55,
"learning_rate": 5.878522536072409e-05,
"loss": 1.0914,
"step": 4280,
"task_loss": 1.2787001132965088
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1799564361572266,
"epoch": 1.55,
"learning_rate": 5.876918298829337e-05,
"loss": 1.1864,
"step": 4290,
"task_loss": 1.129196047782898
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8012198209762573,
"epoch": 1.55,
"learning_rate": 5.875303759947301e-05,
"loss": 1.1393,
"step": 4300,
"task_loss": 1.3329541683197021
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3091764450073242,
"epoch": 1.56,
"learning_rate": 5.873678925207624e-05,
"loss": 1.3255,
"step": 4310,
"task_loss": 1.0994157791137695
},
{
"compression_loss": 0.0,
"distillation_loss": 1.598827600479126,
"epoch": 1.56,
"learning_rate": 5.872043800428498e-05,
"loss": 1.1574,
"step": 4320,
"task_loss": 1.1182217597961426
},
{
"compression_loss": 0.0,
"distillation_loss": 1.304560899734497,
"epoch": 1.56,
"learning_rate": 5.870398391464961e-05,
"loss": 1.153,
"step": 4330,
"task_loss": 1.2183809280395508
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5170609951019287,
"epoch": 1.57,
"learning_rate": 5.868742704208875e-05,
"loss": 1.3349,
"step": 4340,
"task_loss": 1.345301628112793
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2407028675079346,
"epoch": 1.57,
"learning_rate": 5.867076744588908e-05,
"loss": 1.2039,
"step": 4350,
"task_loss": 1.1238722801208496
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2521941661834717,
"epoch": 1.58,
"learning_rate": 5.8654005185705114e-05,
"loss": 1.1578,
"step": 4360,
"task_loss": 0.9843517541885376
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4626522064208984,
"epoch": 1.58,
"learning_rate": 5.863714032155897e-05,
"loss": 1.2691,
"step": 4370,
"task_loss": 0.9148292541503906
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6399775743484497,
"epoch": 1.58,
"learning_rate": 5.8620172913840186e-05,
"loss": 1.2639,
"step": 4380,
"task_loss": 1.2807812690734863
},
{
"compression_loss": 0.0,
"distillation_loss": 1.63152277469635,
"epoch": 1.59,
"learning_rate": 5.860310302330548e-05,
"loss": 1.3362,
"step": 4390,
"task_loss": 1.241642713546753
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8616843223571777,
"epoch": 1.59,
"learning_rate": 5.8585930711078514e-05,
"loss": 1.2129,
"step": 4400,
"task_loss": 1.7467364072799683
},
{
"compression_loss": 0.0,
"distillation_loss": 1.506812334060669,
"epoch": 1.59,
"learning_rate": 5.856865603864975e-05,
"loss": 1.4104,
"step": 4410,
"task_loss": 1.2961748838424683
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5448143482208252,
"epoch": 1.6,
"learning_rate": 5.855127906787615e-05,
"loss": 1.2444,
"step": 4420,
"task_loss": 0.8679540157318115
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8945345878601074,
"epoch": 1.6,
"learning_rate": 5.853379986098098e-05,
"loss": 1.2738,
"step": 4430,
"task_loss": 1.0072073936462402
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8297909498214722,
"epoch": 1.6,
"learning_rate": 5.85162184805536e-05,
"loss": 1.3369,
"step": 4440,
"task_loss": 0.6960026025772095
},
{
"compression_loss": 0.0,
"distillation_loss": 1.204261064529419,
"epoch": 1.61,
"learning_rate": 5.849853498954926e-05,
"loss": 1.2531,
"step": 4450,
"task_loss": 0.7097033262252808
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0963704586029053,
"epoch": 1.61,
"learning_rate": 5.848074945128877e-05,
"loss": 1.1261,
"step": 4460,
"task_loss": 1.0379323959350586
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0468240976333618,
"epoch": 1.62,
"learning_rate": 5.846286192945845e-05,
"loss": 1.1079,
"step": 4470,
"task_loss": 0.7108166813850403
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8962523341178894,
"epoch": 1.62,
"learning_rate": 5.844487248810972e-05,
"loss": 1.1465,
"step": 4480,
"task_loss": 0.902863621711731
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2622666358947754,
"epoch": 1.62,
"learning_rate": 5.8426781191659e-05,
"loss": 1.1282,
"step": 4490,
"task_loss": 1.0960261821746826
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3841495513916016,
"epoch": 1.63,
"learning_rate": 5.840858810488741e-05,
"loss": 1.1508,
"step": 4500,
"task_loss": 1.3540650606155396
},
{
"epoch": 1.63,
"eval_exact_match": 79.4228949858089,
"eval_f1": 87.49554502449215,
"step": 4500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1759483814239502,
"epoch": 1.63,
"learning_rate": 5.8390293292940554e-05,
"loss": 1.2905,
"step": 4510,
"task_loss": 1.0437816381454468
},
{
"compression_loss": 0.0,
"distillation_loss": 1.183558702468872,
"epoch": 1.63,
"learning_rate": 5.837189682132831e-05,
"loss": 1.1933,
"step": 4520,
"task_loss": 1.1283735036849976
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1170258522033691,
"epoch": 1.64,
"learning_rate": 5.8353398755924576e-05,
"loss": 1.2729,
"step": 4530,
"task_loss": 1.0031014680862427
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4635521173477173,
"epoch": 1.64,
"learning_rate": 5.833479916296704e-05,
"loss": 1.2865,
"step": 4540,
"task_loss": 1.0605723857879639
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9159049391746521,
"epoch": 1.64,
"learning_rate": 5.8316098109056905e-05,
"loss": 1.119,
"step": 4550,
"task_loss": 0.7583830952644348
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0998435020446777,
"epoch": 1.65,
"learning_rate": 5.829729566115874e-05,
"loss": 1.0501,
"step": 4560,
"task_loss": 0.7868894338607788
},
{
"compression_loss": 0.0,
"distillation_loss": 2.326512575149536,
"epoch": 1.65,
"learning_rate": 5.827839188660012e-05,
"loss": 1.5112,
"step": 4570,
"task_loss": 2.1364526748657227
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4276812076568604,
"epoch": 1.66,
"learning_rate": 5.825938685307151e-05,
"loss": 1.185,
"step": 4580,
"task_loss": 1.419746994972229
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2273736000061035,
"epoch": 1.66,
"learning_rate": 5.824028062862592e-05,
"loss": 1.0967,
"step": 4590,
"task_loss": 0.9421610832214355
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2494888305664062,
"epoch": 1.66,
"learning_rate": 5.822107328167873e-05,
"loss": 1.2568,
"step": 4600,
"task_loss": 1.162670612335205
},
{
"compression_loss": 0.0,
"distillation_loss": 1.387427568435669,
"epoch": 1.67,
"learning_rate": 5.8201764881007395e-05,
"loss": 1.2342,
"step": 4610,
"task_loss": 1.1487003564834595
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7944004535675049,
"epoch": 1.67,
"learning_rate": 5.8182355495751244e-05,
"loss": 1.0527,
"step": 4620,
"task_loss": 0.8616865873336792
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0871223211288452,
"epoch": 1.67,
"learning_rate": 5.8162845195411193e-05,
"loss": 1.0867,
"step": 4630,
"task_loss": 1.0131196975708008
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1689753532409668,
"epoch": 1.68,
"learning_rate": 5.814323404984954e-05,
"loss": 1.239,
"step": 4640,
"task_loss": 0.9322003126144409
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7448208928108215,
"epoch": 1.68,
"learning_rate": 5.8123522129289646e-05,
"loss": 1.0301,
"step": 4650,
"task_loss": 0.4830577075481415
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9654221534729004,
"epoch": 1.68,
"learning_rate": 5.8103709504315755e-05,
"loss": 1.1417,
"step": 4660,
"task_loss": 1.0902562141418457
},
{
"compression_loss": 0.0,
"distillation_loss": 1.245806097984314,
"epoch": 1.69,
"learning_rate": 5.808379624587272e-05,
"loss": 1.1587,
"step": 4670,
"task_loss": 1.5773122310638428
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8751307129859924,
"epoch": 1.69,
"learning_rate": 5.806378242526572e-05,
"loss": 1.2707,
"step": 4680,
"task_loss": 0.7810333967208862
},
{
"compression_loss": 0.0,
"distillation_loss": 0.968869686126709,
"epoch": 1.69,
"learning_rate": 5.804366811416004e-05,
"loss": 1.0442,
"step": 4690,
"task_loss": 0.7142177224159241
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0504038333892822,
"epoch": 1.7,
"learning_rate": 5.8023453384580784e-05,
"loss": 1.2519,
"step": 4700,
"task_loss": 0.9022510051727295
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1235523223876953,
"epoch": 1.7,
"learning_rate": 5.800313830891265e-05,
"loss": 1.3013,
"step": 4710,
"task_loss": 0.8178448677062988
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7393571138381958,
"epoch": 1.71,
"learning_rate": 5.798272295989965e-05,
"loss": 1.28,
"step": 4720,
"task_loss": 0.8073184490203857
},
{
"compression_loss": 0.0,
"distillation_loss": 1.7382944822311401,
"epoch": 1.71,
"learning_rate": 5.796220741064486e-05,
"loss": 1.4239,
"step": 4730,
"task_loss": 1.6766483783721924
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2721589803695679,
"epoch": 1.71,
"learning_rate": 5.794159173461013e-05,
"loss": 1.0153,
"step": 4740,
"task_loss": 1.1552908420562744
},
{
"compression_loss": 0.0,
"distillation_loss": 1.029242753982544,
"epoch": 1.72,
"learning_rate": 5.7920876005615866e-05,
"loss": 1.2561,
"step": 4750,
"task_loss": 1.1739318370819092
},
{
"epoch": 1.72,
"eval_exact_match": 79.47019867549669,
"eval_f1": 87.64686274053376,
"step": 4750
},
{
"compression_loss": 0.0,
"distillation_loss": 1.001348614692688,
"epoch": 1.72,
"learning_rate": 5.790006029784072e-05,
"loss": 1.0992,
"step": 4760,
"task_loss": 0.8562111258506775
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5794899463653564,
"epoch": 1.72,
"learning_rate": 5.787914468582138e-05,
"loss": 1.1878,
"step": 4770,
"task_loss": 1.2599658966064453
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5452587604522705,
"epoch": 1.73,
"learning_rate": 5.7858129244452245e-05,
"loss": 1.1883,
"step": 4780,
"task_loss": 1.063377857208252
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1784594058990479,
"epoch": 1.73,
"learning_rate": 5.783701404898518e-05,
"loss": 1.1611,
"step": 4790,
"task_loss": 0.8253315091133118
},
{
"compression_loss": 0.0,
"distillation_loss": 1.212900996208191,
"epoch": 1.73,
"learning_rate": 5.781579917502926e-05,
"loss": 1.2314,
"step": 4800,
"task_loss": 0.9298550486564636
},
{
"compression_loss": 0.0,
"distillation_loss": 1.448960542678833,
"epoch": 1.74,
"learning_rate": 5.7794484698550484e-05,
"loss": 1.1093,
"step": 4810,
"task_loss": 1.1630914211273193
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6462247371673584,
"epoch": 1.74,
"learning_rate": 5.777307069587152e-05,
"loss": 1.1482,
"step": 4820,
"task_loss": 1.3052637577056885
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3470487594604492,
"epoch": 1.75,
"learning_rate": 5.775155724367138e-05,
"loss": 1.0766,
"step": 4830,
"task_loss": 1.4758813381195068
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2199983596801758,
"epoch": 1.75,
"learning_rate": 5.7729944418985225e-05,
"loss": 1.1171,
"step": 4840,
"task_loss": 0.9905753135681152
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9857596755027771,
"epoch": 1.75,
"learning_rate": 5.770823229920403e-05,
"loss": 1.1234,
"step": 4850,
"task_loss": 1.0089142322540283
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3423244953155518,
"epoch": 1.76,
"learning_rate": 5.7686420962074325e-05,
"loss": 1.0579,
"step": 4860,
"task_loss": 0.8072549104690552
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5363335609436035,
"epoch": 1.76,
"learning_rate": 5.766451048569792e-05,
"loss": 1.2695,
"step": 4870,
"task_loss": 1.405518889427185
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8228256702423096,
"epoch": 1.76,
"learning_rate": 5.7642500948531614e-05,
"loss": 1.1095,
"step": 4880,
"task_loss": 0.8263888955116272
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6186543703079224,
"epoch": 1.77,
"learning_rate": 5.762039242938693e-05,
"loss": 1.2104,
"step": 4890,
"task_loss": 1.1083335876464844
},
{
"compression_loss": 0.0,
"distillation_loss": 1.022383451461792,
"epoch": 1.77,
"learning_rate": 5.759818500742981e-05,
"loss": 1.139,
"step": 4900,
"task_loss": 0.8983309268951416
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3909627199172974,
"epoch": 1.77,
"learning_rate": 5.757587876218039e-05,
"loss": 1.1174,
"step": 4910,
"task_loss": 1.272632360458374
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5401933193206787,
"epoch": 1.78,
"learning_rate": 5.755347377351262e-05,
"loss": 1.1882,
"step": 4920,
"task_loss": 1.1316332817077637
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4937539100646973,
"epoch": 1.78,
"learning_rate": 5.753097012165404e-05,
"loss": 1.1599,
"step": 4930,
"task_loss": 1.5117436647415161
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2138440608978271,
"epoch": 1.79,
"learning_rate": 5.750836788718551e-05,
"loss": 1.0335,
"step": 4940,
"task_loss": 1.3014496564865112
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9365776777267456,
"epoch": 1.79,
"learning_rate": 5.748566715104086e-05,
"loss": 1.1542,
"step": 4950,
"task_loss": 0.8453970551490784
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6637686491012573,
"epoch": 1.79,
"learning_rate": 5.746286799450667e-05,
"loss": 1.14,
"step": 4960,
"task_loss": 1.3181530237197876
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5403568744659424,
"epoch": 1.8,
"learning_rate": 5.743997049922189e-05,
"loss": 1.3815,
"step": 4970,
"task_loss": 1.4503953456878662
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2465837001800537,
"epoch": 1.8,
"learning_rate": 5.741697474717765e-05,
"loss": 1.0925,
"step": 4980,
"task_loss": 0.894507646560669
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8164676427841187,
"epoch": 1.8,
"learning_rate": 5.73938808207169e-05,
"loss": 1.178,
"step": 4990,
"task_loss": 0.6441599130630493
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4209399223327637,
"epoch": 1.81,
"learning_rate": 5.737068880253413e-05,
"loss": 1.1209,
"step": 5000,
"task_loss": 1.1921095848083496
},
{
"epoch": 1.81,
"eval_exact_match": 79.59318826868495,
"eval_f1": 87.62692030696158,
"step": 5000
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5741052627563477,
"epoch": 1.81,
"learning_rate": 5.7347398775675064e-05,
"loss": 1.1256,
"step": 5010,
"task_loss": 0.9303328990936279
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1673991680145264,
"epoch": 1.81,
"learning_rate": 5.7324010823536405e-05,
"loss": 1.2911,
"step": 5020,
"task_loss": 1.5069319009780884
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5451232194900513,
"epoch": 1.82,
"learning_rate": 5.730052502986547e-05,
"loss": 1.2214,
"step": 5030,
"task_loss": 1.338415503501892
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8016589879989624,
"epoch": 1.82,
"learning_rate": 5.727694147875996e-05,
"loss": 1.2434,
"step": 5040,
"task_loss": 0.6144353151321411
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4259705543518066,
"epoch": 1.83,
"learning_rate": 5.725326025466759e-05,
"loss": 1.231,
"step": 5050,
"task_loss": 1.2222867012023926
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8470112085342407,
"epoch": 1.83,
"learning_rate": 5.722948144238586e-05,
"loss": 1.0969,
"step": 5060,
"task_loss": 1.0450043678283691
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1960935592651367,
"epoch": 1.83,
"learning_rate": 5.7205605127061685e-05,
"loss": 1.2254,
"step": 5070,
"task_loss": 0.9947346448898315
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4478334188461304,
"epoch": 1.84,
"learning_rate": 5.718163139419111e-05,
"loss": 1.2485,
"step": 5080,
"task_loss": 1.0019992589950562
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9755243062973022,
"epoch": 1.84,
"learning_rate": 5.7157560329619036e-05,
"loss": 1.0562,
"step": 5090,
"task_loss": 1.1016393899917603
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9732208251953125,
"epoch": 1.84,
"learning_rate": 5.7133392019538904e-05,
"loss": 1.0489,
"step": 5100,
"task_loss": 1.0136487483978271
},
{
"compression_loss": 0.0,
"distillation_loss": 1.704603910446167,
"epoch": 1.85,
"learning_rate": 5.7109126550492306e-05,
"loss": 1.3115,
"step": 5110,
"task_loss": 1.5213062763214111
},
{
"compression_loss": 0.0,
"distillation_loss": 0.960157573223114,
"epoch": 1.85,
"learning_rate": 5.70847640093688e-05,
"loss": 0.987,
"step": 5120,
"task_loss": 0.6706898212432861
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2005268335342407,
"epoch": 1.85,
"learning_rate": 5.706030448340552e-05,
"loss": 1.1784,
"step": 5130,
"task_loss": 1.5834624767303467
},
{
"compression_loss": 0.0,
"distillation_loss": 1.155214548110962,
"epoch": 1.86,
"learning_rate": 5.7035748060186886e-05,
"loss": 1.2087,
"step": 5140,
"task_loss": 1.0313799381256104
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6764129996299744,
"epoch": 1.86,
"learning_rate": 5.701109482764426e-05,
"loss": 1.2403,
"step": 5150,
"task_loss": 1.207790493965149
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2213506698608398,
"epoch": 1.86,
"learning_rate": 5.69863448740557e-05,
"loss": 1.1657,
"step": 5160,
"task_loss": 0.9657225608825684
},
{
"compression_loss": 0.0,
"distillation_loss": 1.703071117401123,
"epoch": 1.87,
"learning_rate": 5.6961498288045576e-05,
"loss": 1.2662,
"step": 5170,
"task_loss": 1.522236704826355
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8341003656387329,
"epoch": 1.87,
"learning_rate": 5.6936555158584276e-05,
"loss": 1.121,
"step": 5180,
"task_loss": 1.0218309164047241
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0469675064086914,
"epoch": 1.88,
"learning_rate": 5.6911515574987906e-05,
"loss": 1.2055,
"step": 5190,
"task_loss": 0.9404164552688599
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4735569953918457,
"epoch": 1.88,
"learning_rate": 5.688637962691794e-05,
"loss": 1.0771,
"step": 5200,
"task_loss": 1.57379150390625
},
{
"compression_loss": 0.0,
"distillation_loss": 1.191293478012085,
"epoch": 1.88,
"learning_rate": 5.6861147404380914e-05,
"loss": 1.0159,
"step": 5210,
"task_loss": 0.8541813492774963
},
{
"compression_loss": 0.0,
"distillation_loss": 0.772650957107544,
"epoch": 1.89,
"learning_rate": 5.6835818997728116e-05,
"loss": 1.1434,
"step": 5220,
"task_loss": 0.8448182344436646
},
{
"compression_loss": 0.0,
"distillation_loss": 1.254989743232727,
"epoch": 1.89,
"learning_rate": 5.6810394497655246e-05,
"loss": 1.0368,
"step": 5230,
"task_loss": 1.1865497827529907
},
{
"compression_loss": 0.0,
"distillation_loss": 1.007494568824768,
"epoch": 1.89,
"learning_rate": 5.678487399520206e-05,
"loss": 1.0729,
"step": 5240,
"task_loss": 0.6971535086631775
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3780134916305542,
"epoch": 1.9,
"learning_rate": 5.6759257581752135e-05,
"loss": 1.1506,
"step": 5250,
"task_loss": 0.8896455764770508
},
{
"epoch": 1.9,
"eval_exact_match": 80.1608325449385,
"eval_f1": 87.85411242609273,
"step": 5250
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5140453577041626,
"epoch": 1.9,
"learning_rate": 5.673354534903244e-05,
"loss": 1.2356,
"step": 5260,
"task_loss": 1.2429227828979492
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8260661959648132,
"epoch": 1.9,
"learning_rate": 5.670773738911308e-05,
"loss": 1.1357,
"step": 5270,
"task_loss": 0.6863052845001221
},
{
"compression_loss": 0.0,
"distillation_loss": 1.963780164718628,
"epoch": 1.91,
"learning_rate": 5.668183379440692e-05,
"loss": 1.334,
"step": 5280,
"task_loss": 1.6351208686828613
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0707981586456299,
"epoch": 1.91,
"learning_rate": 5.665583465766929e-05,
"loss": 1.1792,
"step": 5290,
"task_loss": 1.136970043182373
},
{
"compression_loss": 0.0,
"distillation_loss": 1.8824594020843506,
"epoch": 1.92,
"learning_rate": 5.662974007199761e-05,
"loss": 1.1516,
"step": 5300,
"task_loss": 1.401637077331543
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5174405574798584,
"epoch": 1.92,
"learning_rate": 5.660355013083112e-05,
"loss": 1.2431,
"step": 5310,
"task_loss": 1.5356242656707764
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1089816093444824,
"epoch": 1.92,
"learning_rate": 5.657726492795047e-05,
"loss": 1.2988,
"step": 5320,
"task_loss": 0.8448818922042847
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6607980728149414,
"epoch": 1.93,
"learning_rate": 5.655088455747745e-05,
"loss": 1.1833,
"step": 5330,
"task_loss": 1.336568832397461
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1899293661117554,
"epoch": 1.93,
"learning_rate": 5.6524409113874617e-05,
"loss": 1.0574,
"step": 5340,
"task_loss": 0.9695686101913452
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5410118103027344,
"epoch": 1.93,
"learning_rate": 5.649783869194495e-05,
"loss": 1.176,
"step": 5350,
"task_loss": 1.28522527217865
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2499977350234985,
"epoch": 1.94,
"learning_rate": 5.647117338683155e-05,
"loss": 1.1411,
"step": 5360,
"task_loss": 1.0919628143310547
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4456626176834106,
"epoch": 1.94,
"learning_rate": 5.6444413294017266e-05,
"loss": 1.204,
"step": 5370,
"task_loss": 0.9402436017990112
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0688556432724,
"epoch": 1.94,
"learning_rate": 5.641755850932434e-05,
"loss": 1.0496,
"step": 5380,
"task_loss": 0.4863385856151581
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1923037767410278,
"epoch": 1.95,
"learning_rate": 5.639060912891412e-05,
"loss": 1.3074,
"step": 5390,
"task_loss": 1.196105718612671
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5121135711669922,
"epoch": 1.95,
"learning_rate": 5.636356524928666e-05,
"loss": 1.2095,
"step": 5400,
"task_loss": 1.1289417743682861
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0193203687667847,
"epoch": 1.96,
"learning_rate": 5.6336426967280403e-05,
"loss": 1.0156,
"step": 5410,
"task_loss": 0.6196513175964355
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1136207580566406,
"epoch": 1.96,
"learning_rate": 5.6309194380071825e-05,
"loss": 1.0743,
"step": 5420,
"task_loss": 0.920132577419281
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0087127685546875,
"epoch": 1.96,
"learning_rate": 5.6281867585175094e-05,
"loss": 1.1059,
"step": 5430,
"task_loss": 0.8849406242370605
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9823505282402039,
"epoch": 1.97,
"learning_rate": 5.62544466804417e-05,
"loss": 1.1327,
"step": 5440,
"task_loss": 1.1544190645217896
},
{
"compression_loss": 0.0,
"distillation_loss": 0.929497480392456,
"epoch": 1.97,
"learning_rate": 5.622693176406015e-05,
"loss": 1.2371,
"step": 5450,
"task_loss": 0.6615207195281982
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8319283723831177,
"epoch": 1.97,
"learning_rate": 5.619932293455555e-05,
"loss": 1.1588,
"step": 5460,
"task_loss": 0.6978183388710022
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0234538316726685,
"epoch": 1.98,
"learning_rate": 5.617162029078931e-05,
"loss": 1.2865,
"step": 5470,
"task_loss": 1.0632944107055664
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2524006366729736,
"epoch": 1.98,
"learning_rate": 5.614382393195878e-05,
"loss": 1.1138,
"step": 5480,
"task_loss": 0.9473165273666382
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9574975967407227,
"epoch": 1.98,
"learning_rate": 5.611593395759687e-05,
"loss": 1.1326,
"step": 5490,
"task_loss": 0.6084072589874268
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1083191633224487,
"epoch": 1.99,
"learning_rate": 5.6087950467571686e-05,
"loss": 1.0398,
"step": 5500,
"task_loss": 1.5072331428527832
},
{
"epoch": 1.99,
"eval_exact_match": 79.93377483443709,
"eval_f1": 87.66760484249721,
"step": 5500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1833401918411255,
"epoch": 1.99,
"learning_rate": 5.605987356208624e-05,
"loss": 1.1425,
"step": 5510,
"task_loss": 0.9314687252044678
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9431557059288025,
"epoch": 1.99,
"learning_rate": 5.603170334167802e-05,
"loss": 1.1334,
"step": 5520,
"task_loss": 0.8197354078292847
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1592495441436768,
"epoch": 2.0,
"learning_rate": 5.6003439907218656e-05,
"loss": 1.2719,
"step": 5530,
"task_loss": 1.1633968353271484
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9738726615905762,
"epoch": 2.0,
"learning_rate": 5.597508335991354e-05,
"loss": 1.0092,
"step": 5540,
"task_loss": 0.6517045497894287
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9099946022033691,
"epoch": 2.01,
"learning_rate": 5.594663380130153e-05,
"loss": 0.9207,
"step": 5550,
"task_loss": 0.8611887693405151
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8894250392913818,
"epoch": 2.01,
"learning_rate": 5.591809133325448e-05,
"loss": 0.8773,
"step": 5560,
"task_loss": 0.7208254933357239
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1827757358551025,
"epoch": 2.01,
"learning_rate": 5.588945605797698e-05,
"loss": 0.8743,
"step": 5570,
"task_loss": 1.253017783164978
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9436140060424805,
"epoch": 2.02,
"learning_rate": 5.5860728078005916e-05,
"loss": 0.983,
"step": 5580,
"task_loss": 0.6787883639335632
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3159337043762207,
"epoch": 2.02,
"learning_rate": 5.583190749621014e-05,
"loss": 0.9276,
"step": 5590,
"task_loss": 1.879063367843628
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7113268375396729,
"epoch": 2.02,
"learning_rate": 5.580299441579008e-05,
"loss": 0.8873,
"step": 5600,
"task_loss": 1.2208278179168701
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8449651002883911,
"epoch": 2.03,
"learning_rate": 5.5773988940277416e-05,
"loss": 0.8183,
"step": 5610,
"task_loss": 0.7712351083755493
},
{
"compression_loss": 0.0,
"distillation_loss": 1.184571385383606,
"epoch": 2.03,
"learning_rate": 5.574489117353463e-05,
"loss": 0.8997,
"step": 5620,
"task_loss": 1.2251086235046387
},
{
"compression_loss": 0.0,
"distillation_loss": 1.027880311012268,
"epoch": 2.03,
"learning_rate": 5.571570121975472e-05,
"loss": 0.8811,
"step": 5630,
"task_loss": 1.2804369926452637
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5541472434997559,
"epoch": 2.04,
"learning_rate": 5.568641918346074e-05,
"loss": 1.0087,
"step": 5640,
"task_loss": 1.7716798782348633
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9887833595275879,
"epoch": 2.04,
"learning_rate": 5.565704516950552e-05,
"loss": 0.9056,
"step": 5650,
"task_loss": 1.108952283859253
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8718394041061401,
"epoch": 2.05,
"learning_rate": 5.562757928307121e-05,
"loss": 0.8578,
"step": 5660,
"task_loss": 0.7642139196395874
},
{
"compression_loss": 0.0,
"distillation_loss": 1.6347434520721436,
"epoch": 2.05,
"learning_rate": 5.559802162966897e-05,
"loss": 0.9785,
"step": 5670,
"task_loss": 1.5460071563720703
},
{
"compression_loss": 0.0,
"distillation_loss": 1.013885736465454,
"epoch": 2.05,
"learning_rate": 5.556837231513852e-05,
"loss": 0.9215,
"step": 5680,
"task_loss": 1.1903676986694336
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8532087802886963,
"epoch": 2.06,
"learning_rate": 5.553863144564781e-05,
"loss": 0.9352,
"step": 5690,
"task_loss": 1.0140926837921143
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6742889881134033,
"epoch": 2.06,
"learning_rate": 5.550879912769264e-05,
"loss": 0.9399,
"step": 5700,
"task_loss": 0.7566931843757629
},
{
"compression_loss": 0.0,
"distillation_loss": 1.184865117073059,
"epoch": 2.06,
"learning_rate": 5.5478875468096265e-05,
"loss": 0.9145,
"step": 5710,
"task_loss": 1.0496134757995605
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8306396007537842,
"epoch": 2.07,
"learning_rate": 5.5448860574009015e-05,
"loss": 0.8817,
"step": 5720,
"task_loss": 0.7623034715652466
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2012348175048828,
"epoch": 2.07,
"learning_rate": 5.5418754552907905e-05,
"loss": 0.9289,
"step": 5730,
"task_loss": 1.1866190433502197
},
{
"compression_loss": 0.0,
"distillation_loss": 1.015893578529358,
"epoch": 2.07,
"learning_rate": 5.5388557512596255e-05,
"loss": 0.8678,
"step": 5740,
"task_loss": 0.9989817142486572
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8217979669570923,
"epoch": 2.08,
"learning_rate": 5.535826956120332e-05,
"loss": 0.9298,
"step": 5750,
"task_loss": 1.2064638137817383
},
{
"epoch": 2.08,
"eval_exact_match": 79.97161778618732,
"eval_f1": 88.04204326637895,
"step": 5750
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9215903878211975,
"epoch": 2.08,
"learning_rate": 5.532789080718388e-05,
"loss": 0.9191,
"step": 5760,
"task_loss": 1.0276141166687012
},
{
"compression_loss": 0.0,
"distillation_loss": 1.038353681564331,
"epoch": 2.09,
"learning_rate": 5.5297421359317855e-05,
"loss": 0.8936,
"step": 5770,
"task_loss": 1.220682144165039
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6983292102813721,
"epoch": 2.09,
"learning_rate": 5.5266861326709916e-05,
"loss": 0.93,
"step": 5780,
"task_loss": 1.0011334419250488
},
{
"compression_loss": 0.0,
"distillation_loss": 1.005040168762207,
"epoch": 2.09,
"learning_rate": 5.5236210818789134e-05,
"loss": 0.9048,
"step": 5790,
"task_loss": 0.8836946487426758
},
{
"compression_loss": 0.0,
"distillation_loss": 1.034933090209961,
"epoch": 2.1,
"learning_rate": 5.52054699453085e-05,
"loss": 0.8101,
"step": 5800,
"task_loss": 0.8946719765663147
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0742028951644897,
"epoch": 2.1,
"learning_rate": 5.5174638816344634e-05,
"loss": 1.0352,
"step": 5810,
"task_loss": 0.7151690721511841
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9790550470352173,
"epoch": 2.1,
"learning_rate": 5.514371754229731e-05,
"loss": 0.8449,
"step": 5820,
"task_loss": 0.990552544593811
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0028564929962158,
"epoch": 2.11,
"learning_rate": 5.5112706233889095e-05,
"loss": 0.8105,
"step": 5830,
"task_loss": 0.9065827131271362
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9125075340270996,
"epoch": 2.11,
"learning_rate": 5.508160500216497e-05,
"loss": 0.7907,
"step": 5840,
"task_loss": 0.7284402251243591
},
{
"compression_loss": 0.0,
"distillation_loss": 1.029099464416504,
"epoch": 2.11,
"learning_rate": 5.50504139584919e-05,
"loss": 1.1059,
"step": 5850,
"task_loss": 0.6841808557510376
},
{
"compression_loss": 0.0,
"distillation_loss": 1.064493179321289,
"epoch": 2.12,
"learning_rate": 5.5019133214558446e-05,
"loss": 1.0304,
"step": 5860,
"task_loss": 1.0819746255874634
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7983106374740601,
"epoch": 2.12,
"learning_rate": 5.4987762882374365e-05,
"loss": 0.8406,
"step": 5870,
"task_loss": 0.940750002861023
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7291630506515503,
"epoch": 2.13,
"learning_rate": 5.495630307427021e-05,
"loss": 0.8193,
"step": 5880,
"task_loss": 0.7505497932434082
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1587085723876953,
"epoch": 2.13,
"learning_rate": 5.492475390289695e-05,
"loss": 0.9177,
"step": 5890,
"task_loss": 1.006648302078247
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9014360904693604,
"epoch": 2.13,
"learning_rate": 5.489311548122551e-05,
"loss": 0.9625,
"step": 5900,
"task_loss": 0.8569836616516113
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8141177892684937,
"epoch": 2.14,
"learning_rate": 5.486138792254644e-05,
"loss": 0.9307,
"step": 5910,
"task_loss": 0.8875386714935303
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6669865250587463,
"epoch": 2.14,
"learning_rate": 5.482957134046943e-05,
"loss": 0.9236,
"step": 5920,
"task_loss": 1.001587152481079
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8241477012634277,
"epoch": 2.14,
"learning_rate": 5.479766584892297e-05,
"loss": 0.8674,
"step": 5930,
"task_loss": 1.012243628501892
},
{
"compression_loss": 0.0,
"distillation_loss": 0.938452959060669,
"epoch": 2.15,
"learning_rate": 5.476567156215392e-05,
"loss": 0.9174,
"step": 5940,
"task_loss": 0.5267655253410339
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8367009162902832,
"epoch": 2.15,
"learning_rate": 5.4733588594727075e-05,
"loss": 0.8657,
"step": 5950,
"task_loss": 0.5014982223510742
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9218536615371704,
"epoch": 2.15,
"learning_rate": 5.470141706152479e-05,
"loss": 0.9756,
"step": 5960,
"task_loss": 1.0196517705917358
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5319288372993469,
"epoch": 2.16,
"learning_rate": 5.466915707774656e-05,
"loss": 0.8654,
"step": 5970,
"task_loss": 0.6182997226715088
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7957490682601929,
"epoch": 2.16,
"learning_rate": 5.463680875890861e-05,
"loss": 0.8933,
"step": 5980,
"task_loss": 0.7909960746765137
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5595934391021729,
"epoch": 2.16,
"learning_rate": 5.460437222084344e-05,
"loss": 0.9095,
"step": 5990,
"task_loss": 0.6065921783447266
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3127511739730835,
"epoch": 2.17,
"learning_rate": 5.4571847579699476e-05,
"loss": 0.9389,
"step": 6000,
"task_loss": 1.0244468450546265
},
{
"epoch": 2.17,
"eval_exact_match": 80.4162724692526,
"eval_f1": 88.26245305245004,
"step": 6000
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9304770231246948,
"epoch": 2.17,
"learning_rate": 5.453923495194062e-05,
"loss": 0.8205,
"step": 6010,
"task_loss": 0.7821296453475952
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4573397636413574,
"epoch": 2.18,
"learning_rate": 5.4506534454345814e-05,
"loss": 0.9258,
"step": 6020,
"task_loss": 1.1528065204620361
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7485623359680176,
"epoch": 2.18,
"learning_rate": 5.4473746204008664e-05,
"loss": 1.008,
"step": 6030,
"task_loss": 0.8168891668319702
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9035183191299438,
"epoch": 2.18,
"learning_rate": 5.444087031833699e-05,
"loss": 0.8543,
"step": 6040,
"task_loss": 1.0606868267059326
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1698881387710571,
"epoch": 2.19,
"learning_rate": 5.4407906915052425e-05,
"loss": 0.8646,
"step": 6050,
"task_loss": 1.4439537525177002
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8415951132774353,
"epoch": 2.19,
"learning_rate": 5.437485611218999e-05,
"loss": 0.8619,
"step": 6060,
"task_loss": 0.7370737791061401
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1627850532531738,
"epoch": 2.19,
"learning_rate": 5.434171802809764e-05,
"loss": 0.9036,
"step": 6070,
"task_loss": 0.8056766390800476
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7362239956855774,
"epoch": 2.2,
"learning_rate": 5.430849278143587e-05,
"loss": 0.8756,
"step": 6080,
"task_loss": 1.0063245296478271
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8030550479888916,
"epoch": 2.2,
"learning_rate": 5.427518049117732e-05,
"loss": 0.9322,
"step": 6090,
"task_loss": 0.8871850967407227
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0586618185043335,
"epoch": 2.2,
"learning_rate": 5.4241781276606274e-05,
"loss": 0.8834,
"step": 6100,
"task_loss": 1.0324198007583618
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6400578022003174,
"epoch": 2.21,
"learning_rate": 5.4208295257318296e-05,
"loss": 0.8739,
"step": 6110,
"task_loss": 0.48888489603996277
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6222153306007385,
"epoch": 2.21,
"learning_rate": 5.417472255321977e-05,
"loss": 0.8171,
"step": 6120,
"task_loss": 0.6630902290344238
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6763343811035156,
"epoch": 2.22,
"learning_rate": 5.414106328452749e-05,
"loss": 0.9668,
"step": 6130,
"task_loss": 0.6360142827033997
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9001832008361816,
"epoch": 2.22,
"learning_rate": 5.4107317571768215e-05,
"loss": 0.9618,
"step": 6140,
"task_loss": 0.7632064819335938
},
{
"compression_loss": 0.0,
"distillation_loss": 1.07004714012146,
"epoch": 2.22,
"learning_rate": 5.407348553577823e-05,
"loss": 0.8878,
"step": 6150,
"task_loss": 1.4211241006851196
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8392785787582397,
"epoch": 2.23,
"learning_rate": 5.403956729770294e-05,
"loss": 0.9421,
"step": 6160,
"task_loss": 0.6807509660720825
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8789479732513428,
"epoch": 2.23,
"learning_rate": 5.4005562978996424e-05,
"loss": 0.9041,
"step": 6170,
"task_loss": 0.6013485193252563
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9375628232955933,
"epoch": 2.23,
"learning_rate": 5.3971472701420984e-05,
"loss": 1.005,
"step": 6180,
"task_loss": 0.9426907300949097
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8983448147773743,
"epoch": 2.24,
"learning_rate": 5.393729658704673e-05,
"loss": 0.8028,
"step": 6190,
"task_loss": 0.7798272371292114
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8339853882789612,
"epoch": 2.24,
"learning_rate": 5.3903034758251136e-05,
"loss": 0.8309,
"step": 6200,
"task_loss": 0.4220605194568634
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8184782266616821,
"epoch": 2.24,
"learning_rate": 5.386868733771861e-05,
"loss": 0.8682,
"step": 6210,
"task_loss": 0.509628415107727
},
{
"compression_loss": 0.0,
"distillation_loss": 1.024518609046936,
"epoch": 2.25,
"learning_rate": 5.383425444844001e-05,
"loss": 0.8848,
"step": 6220,
"task_loss": 1.1755342483520508
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6047682166099548,
"epoch": 2.25,
"learning_rate": 5.379973621371229e-05,
"loss": 0.8786,
"step": 6230,
"task_loss": 0.5626362562179565
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2282063961029053,
"epoch": 2.26,
"learning_rate": 5.3765132757137976e-05,
"loss": 0.9194,
"step": 6240,
"task_loss": 0.9038418531417847
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0795953273773193,
"epoch": 2.26,
"learning_rate": 5.373044420262477e-05,
"loss": 0.8973,
"step": 6250,
"task_loss": 1.1270602941513062
},
{
"epoch": 2.26,
"eval_exact_match": 80.32166508987702,
"eval_f1": 88.11121062075824,
"step": 6250
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9112297296524048,
"epoch": 2.26,
"learning_rate": 5.369567067438508e-05,
"loss": 0.8557,
"step": 6260,
"task_loss": 1.6011927127838135
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8365263938903809,
"epoch": 2.27,
"learning_rate": 5.3660812296935584e-05,
"loss": 0.8108,
"step": 6270,
"task_loss": 0.6560771465301514
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1342790126800537,
"epoch": 2.27,
"learning_rate": 5.362586919509681e-05,
"loss": 0.9093,
"step": 6280,
"task_loss": 1.2042913436889648
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9722185134887695,
"epoch": 2.27,
"learning_rate": 5.359084149399264e-05,
"loss": 0.8678,
"step": 6290,
"task_loss": 0.9090611934661865
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0596710443496704,
"epoch": 2.28,
"learning_rate": 5.355572931904991e-05,
"loss": 0.9807,
"step": 6300,
"task_loss": 1.3353559970855713
},
{
"compression_loss": 0.0,
"distillation_loss": 1.045316457748413,
"epoch": 2.28,
"learning_rate": 5.3520532795997916e-05,
"loss": 0.8632,
"step": 6310,
"task_loss": 0.6300460696220398
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8498081564903259,
"epoch": 2.28,
"learning_rate": 5.348525205086801e-05,
"loss": 0.9675,
"step": 6320,
"task_loss": 0.6726857423782349
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9325256943702698,
"epoch": 2.29,
"learning_rate": 5.344988720999312e-05,
"loss": 0.9498,
"step": 6330,
"task_loss": 0.6439615488052368
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8513062000274658,
"epoch": 2.29,
"learning_rate": 5.3414438400007276e-05,
"loss": 0.8753,
"step": 6340,
"task_loss": 0.7384096384048462
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8684785962104797,
"epoch": 2.29,
"learning_rate": 5.337890574784523e-05,
"loss": 0.8852,
"step": 6350,
"task_loss": 0.7421172261238098
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8197358846664429,
"epoch": 2.3,
"learning_rate": 5.334328938074193e-05,
"loss": 0.8828,
"step": 6360,
"task_loss": 0.8697389364242554
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9179954528808594,
"epoch": 2.3,
"learning_rate": 5.3307589426232076e-05,
"loss": 0.8377,
"step": 6370,
"task_loss": 1.053146481513977
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9556145071983337,
"epoch": 2.31,
"learning_rate": 5.327180601214971e-05,
"loss": 0.8407,
"step": 6380,
"task_loss": 0.9011473655700684
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7499949932098389,
"epoch": 2.31,
"learning_rate": 5.323593926662768e-05,
"loss": 0.9696,
"step": 6390,
"task_loss": 0.6749441623687744
},
{
"compression_loss": 0.0,
"distillation_loss": 1.013110876083374,
"epoch": 2.31,
"learning_rate": 5.319998931809728e-05,
"loss": 0.9384,
"step": 6400,
"task_loss": 0.854491651058197
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0834851264953613,
"epoch": 2.32,
"learning_rate": 5.316395629528771e-05,
"loss": 0.7752,
"step": 6410,
"task_loss": 1.330308198928833
},
{
"compression_loss": 0.0,
"distillation_loss": 1.5485963821411133,
"epoch": 2.32,
"learning_rate": 5.312784032722561e-05,
"loss": 0.9658,
"step": 6420,
"task_loss": 1.17985200881958
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8666309118270874,
"epoch": 2.32,
"learning_rate": 5.309164154323469e-05,
"loss": 0.9012,
"step": 6430,
"task_loss": 0.88444983959198
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6017141938209534,
"epoch": 2.33,
"learning_rate": 5.305536007293517e-05,
"loss": 0.9155,
"step": 6440,
"task_loss": 0.6628368496894836
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7634067535400391,
"epoch": 2.33,
"learning_rate": 5.301899604624336e-05,
"loss": 0.8433,
"step": 6450,
"task_loss": 0.7978712320327759
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9220677614212036,
"epoch": 2.33,
"learning_rate": 5.298254959337119e-05,
"loss": 0.8675,
"step": 6460,
"task_loss": 1.0927445888519287
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6314671039581299,
"epoch": 2.34,
"learning_rate": 5.294602084482573e-05,
"loss": 0.8681,
"step": 6470,
"task_loss": 0.41532278060913086
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7226696014404297,
"epoch": 2.34,
"learning_rate": 5.2909409931408735e-05,
"loss": 0.9045,
"step": 6480,
"task_loss": 0.8908495306968689
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8101774454116821,
"epoch": 2.35,
"learning_rate": 5.28727169842162e-05,
"loss": 0.9288,
"step": 6490,
"task_loss": 0.5963428020477295
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7302408814430237,
"epoch": 2.35,
"learning_rate": 5.283594213463783e-05,
"loss": 0.7634,
"step": 6500,
"task_loss": 0.6015718579292297
},
{
"epoch": 2.35,
"eval_exact_match": 80.53926206244087,
"eval_f1": 88.24102075665736,
"step": 6500
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8358856439590454,
"epoch": 2.35,
"learning_rate": 5.279908551435662e-05,
"loss": 0.8888,
"step": 6510,
"task_loss": 0.5410736799240112
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7577072381973267,
"epoch": 2.36,
"learning_rate": 5.276214725534839e-05,
"loss": 0.9435,
"step": 6520,
"task_loss": 0.9352724552154541
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6489107012748718,
"epoch": 2.36,
"learning_rate": 5.272512748988126e-05,
"loss": 0.8791,
"step": 6530,
"task_loss": 0.7899311780929565
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6359513998031616,
"epoch": 2.36,
"learning_rate": 5.268802635051522e-05,
"loss": 0.7681,
"step": 6540,
"task_loss": 0.3422088027000427
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0363128185272217,
"epoch": 2.37,
"learning_rate": 5.265084397010164e-05,
"loss": 0.9111,
"step": 6550,
"task_loss": 1.0027375221252441
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0372557640075684,
"epoch": 2.37,
"learning_rate": 5.26135804817828e-05,
"loss": 0.8456,
"step": 6560,
"task_loss": 1.0056697130203247
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9141188263893127,
"epoch": 2.37,
"learning_rate": 5.257623601899141e-05,
"loss": 0.8281,
"step": 6570,
"task_loss": 1.3693073987960815
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7791051864624023,
"epoch": 2.38,
"learning_rate": 5.253881071545013e-05,
"loss": 0.8698,
"step": 6580,
"task_loss": 0.7371783256530762
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7637522220611572,
"epoch": 2.38,
"learning_rate": 5.25013047051711e-05,
"loss": 0.8717,
"step": 6590,
"task_loss": 0.41452422738075256
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6556802988052368,
"epoch": 2.39,
"learning_rate": 5.2463718122455444e-05,
"loss": 0.8335,
"step": 6600,
"task_loss": 0.6950621604919434
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9156330823898315,
"epoch": 2.39,
"learning_rate": 5.242605110189281e-05,
"loss": 0.9741,
"step": 6610,
"task_loss": 0.9320003986358643
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6790286302566528,
"epoch": 2.39,
"learning_rate": 5.2388303778360865e-05,
"loss": 0.8083,
"step": 6620,
"task_loss": 0.5760271549224854
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9703154563903809,
"epoch": 2.4,
"learning_rate": 5.235047628702483e-05,
"loss": 0.9415,
"step": 6630,
"task_loss": 0.8003017902374268
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7981125712394714,
"epoch": 2.4,
"learning_rate": 5.2312568763336995e-05,
"loss": 1.0161,
"step": 6640,
"task_loss": 0.6487212181091309
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8091559410095215,
"epoch": 2.4,
"learning_rate": 5.227458134303623e-05,
"loss": 0.8289,
"step": 6650,
"task_loss": 1.0017086267471313
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7591532468795776,
"epoch": 2.41,
"learning_rate": 5.223651416214749e-05,
"loss": 0.8141,
"step": 6660,
"task_loss": 0.4736844599246979
},
{
"compression_loss": 0.0,
"distillation_loss": 0.658312201499939,
"epoch": 2.41,
"learning_rate": 5.2198367356981325e-05,
"loss": 0.8889,
"step": 6670,
"task_loss": 0.6780068278312683
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9467959403991699,
"epoch": 2.41,
"learning_rate": 5.2160141064133426e-05,
"loss": 0.8872,
"step": 6680,
"task_loss": 1.043292760848999
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7465643882751465,
"epoch": 2.42,
"learning_rate": 5.2125669551728134e-05,
"loss": 0.7908,
"step": 6690,
"task_loss": 0.5276561975479126
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8351773023605347,
"epoch": 2.42,
"learning_rate": 5.208729260962506e-05,
"loss": 0.9061,
"step": 6700,
"task_loss": 0.9505938291549683
},
{
"compression_loss": 0.0,
"distillation_loss": 0.628624439239502,
"epoch": 2.43,
"learning_rate": 5.204883657757553e-05,
"loss": 0.879,
"step": 6710,
"task_loss": 0.8255483508110046
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6216802000999451,
"epoch": 2.43,
"learning_rate": 5.201030159328252e-05,
"loss": 0.8161,
"step": 6720,
"task_loss": 0.7303711175918579
},
{
"compression_loss": 0.0,
"distillation_loss": 1.187577247619629,
"epoch": 2.43,
"learning_rate": 5.1971687794731685e-05,
"loss": 0.8769,
"step": 6730,
"task_loss": 0.9617449045181274
},
{
"compression_loss": 0.0,
"distillation_loss": 1.022446870803833,
"epoch": 2.44,
"learning_rate": 5.193299532019094e-05,
"loss": 0.9059,
"step": 6740,
"task_loss": 1.0978424549102783
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7576553821563721,
"epoch": 2.44,
"learning_rate": 5.189422430820986e-05,
"loss": 0.9146,
"step": 6750,
"task_loss": 0.9070881605148315
},
{
"epoch": 2.44,
"eval_exact_match": 80.6244087038789,
"eval_f1": 88.24355008399391,
"step": 6750
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3398449420928955,
"epoch": 2.44,
"learning_rate": 5.185537489761931e-05,
"loss": 1.0719,
"step": 6760,
"task_loss": 1.251268744468689
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8856955766677856,
"epoch": 2.45,
"learning_rate": 5.181644722753083e-05,
"loss": 0.8418,
"step": 6770,
"task_loss": 0.7368022799491882
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0002448558807373,
"epoch": 2.45,
"learning_rate": 5.177744143733622e-05,
"loss": 0.8679,
"step": 6780,
"task_loss": 1.0230883359909058
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0123604536056519,
"epoch": 2.45,
"learning_rate": 5.173835766670701e-05,
"loss": 0.9899,
"step": 6790,
"task_loss": 1.1285487413406372
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8969544172286987,
"epoch": 2.46,
"learning_rate": 5.1699196055593954e-05,
"loss": 0.9321,
"step": 6800,
"task_loss": 0.9808671474456787
},
{
"compression_loss": 0.0,
"distillation_loss": 1.399101734161377,
"epoch": 2.46,
"learning_rate": 5.165995674422654e-05,
"loss": 1.0175,
"step": 6810,
"task_loss": 0.9696444272994995
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7293881177902222,
"epoch": 2.46,
"learning_rate": 5.162063987311249e-05,
"loss": 0.9052,
"step": 6820,
"task_loss": 0.8145143389701843
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7391337156295776,
"epoch": 2.47,
"learning_rate": 5.158124558303723e-05,
"loss": 0.7945,
"step": 6830,
"task_loss": 0.7841509580612183
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6791837215423584,
"epoch": 2.47,
"learning_rate": 5.1541774015063435e-05,
"loss": 0.8292,
"step": 6840,
"task_loss": 0.6428923606872559
},
{
"compression_loss": 0.0,
"distillation_loss": 0.803602397441864,
"epoch": 2.48,
"learning_rate": 5.150222531053048e-05,
"loss": 0.8626,
"step": 6850,
"task_loss": 0.8221435546875
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7116631269454956,
"epoch": 2.48,
"learning_rate": 5.146259961105396e-05,
"loss": 0.9257,
"step": 6860,
"task_loss": 0.5078368186950684
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9278137683868408,
"epoch": 2.48,
"learning_rate": 5.142289705852514e-05,
"loss": 0.9753,
"step": 6870,
"task_loss": 1.4697847366333008
},
{
"compression_loss": 0.0,
"distillation_loss": 0.93769770860672,
"epoch": 2.49,
"learning_rate": 5.138311779511054e-05,
"loss": 1.012,
"step": 6880,
"task_loss": 1.0208895206451416
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5001537799835205,
"epoch": 2.49,
"learning_rate": 5.134326196325131e-05,
"loss": 0.8302,
"step": 6890,
"task_loss": 0.9387691020965576
},
{
"compression_loss": 0.0,
"distillation_loss": 0.733636736869812,
"epoch": 2.49,
"learning_rate": 5.130332970566278e-05,
"loss": 0.822,
"step": 6900,
"task_loss": 0.7175207734107971
},
{
"compression_loss": 0.0,
"distillation_loss": 0.924873948097229,
"epoch": 2.5,
"learning_rate": 5.1263321165334e-05,
"loss": 1.0059,
"step": 6910,
"task_loss": 0.8183651566505432
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9852616190910339,
"epoch": 2.5,
"learning_rate": 5.122323648552711e-05,
"loss": 0.9519,
"step": 6920,
"task_loss": 1.253139853477478
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8422321081161499,
"epoch": 2.5,
"learning_rate": 5.11830758097769e-05,
"loss": 0.771,
"step": 6930,
"task_loss": 0.6653087139129639
},
{
"compression_loss": 0.0,
"distillation_loss": 1.179046392440796,
"epoch": 2.51,
"learning_rate": 5.114283928189032e-05,
"loss": 1.021,
"step": 6940,
"task_loss": 0.617653489112854
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5847468376159668,
"epoch": 2.51,
"learning_rate": 5.110252704594591e-05,
"loss": 0.8193,
"step": 6950,
"task_loss": 0.5160527229309082
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0966300964355469,
"epoch": 2.52,
"learning_rate": 5.106213924629328e-05,
"loss": 1.0079,
"step": 6960,
"task_loss": 0.8059948682785034
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7070465087890625,
"epoch": 2.52,
"learning_rate": 5.102167602755267e-05,
"loss": 0.9058,
"step": 6970,
"task_loss": 0.5223201513290405
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9722675681114197,
"epoch": 2.52,
"learning_rate": 5.0981137534614325e-05,
"loss": 0.8457,
"step": 6980,
"task_loss": 0.7618287801742554
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8757837414741516,
"epoch": 2.53,
"learning_rate": 5.094052391263807e-05,
"loss": 0.8939,
"step": 6990,
"task_loss": 0.8423348665237427
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3165154457092285,
"epoch": 2.53,
"learning_rate": 5.089983530705272e-05,
"loss": 0.8859,
"step": 7000,
"task_loss": 0.809664785861969
},
{
"epoch": 2.53,
"eval_exact_match": 80.70955534531693,
"eval_f1": 88.51734461953797,
"step": 7000
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7622497081756592,
"epoch": 2.53,
"learning_rate": 5.085907186355564e-05,
"loss": 0.8179,
"step": 7010,
"task_loss": 0.8336362242698669
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6575897932052612,
"epoch": 2.54,
"learning_rate": 5.081823372811212e-05,
"loss": 0.894,
"step": 7020,
"task_loss": 0.7306101322174072
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7808577418327332,
"epoch": 2.54,
"learning_rate": 5.0777321046954936e-05,
"loss": 0.8247,
"step": 7030,
"task_loss": 0.9445192217826843
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7908771634101868,
"epoch": 2.54,
"learning_rate": 5.073633396658378e-05,
"loss": 0.8793,
"step": 7040,
"task_loss": 1.0309138298034668
},
{
"compression_loss": 0.0,
"distillation_loss": 0.759590208530426,
"epoch": 2.55,
"learning_rate": 5.069527263376478e-05,
"loss": 0.8812,
"step": 7050,
"task_loss": 0.7510941028594971
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9497214555740356,
"epoch": 2.55,
"learning_rate": 5.06541371955299e-05,
"loss": 0.9211,
"step": 7060,
"task_loss": 1.1385433673858643
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8788970708847046,
"epoch": 2.56,
"learning_rate": 5.061292779917651e-05,
"loss": 0.9177,
"step": 7070,
"task_loss": 1.2690625190734863
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5018882751464844,
"epoch": 2.56,
"learning_rate": 5.0571644592266784e-05,
"loss": 0.898,
"step": 7080,
"task_loss": 0.3722462058067322
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0037760734558105,
"epoch": 2.56,
"learning_rate": 5.053028772262718e-05,
"loss": 0.9261,
"step": 7090,
"task_loss": 0.9891495704650879
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2450032234191895,
"epoch": 2.57,
"learning_rate": 5.048885733834797e-05,
"loss": 0.8834,
"step": 7100,
"task_loss": 1.3762762546539307
},
{
"compression_loss": 0.0,
"distillation_loss": 1.4217417240142822,
"epoch": 2.57,
"learning_rate": 5.044735358778261e-05,
"loss": 0.8716,
"step": 7110,
"task_loss": 1.2648154497146606
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0889158248901367,
"epoch": 2.57,
"learning_rate": 5.040577661954731e-05,
"loss": 0.9575,
"step": 7120,
"task_loss": 0.7835713624954224
},
{
"compression_loss": 0.0,
"distillation_loss": 0.707399845123291,
"epoch": 2.58,
"learning_rate": 5.0364126582520454e-05,
"loss": 0.9368,
"step": 7130,
"task_loss": 0.6754993200302124
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8570347428321838,
"epoch": 2.58,
"learning_rate": 5.0322403625842056e-05,
"loss": 0.8977,
"step": 7140,
"task_loss": 1.0160921812057495
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1405144929885864,
"epoch": 2.58,
"learning_rate": 5.028060789891323e-05,
"loss": 0.9357,
"step": 7150,
"task_loss": 1.246722936630249
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8279078006744385,
"epoch": 2.59,
"learning_rate": 5.0238739551395684e-05,
"loss": 0.8706,
"step": 7160,
"task_loss": 0.8700082302093506
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7681478261947632,
"epoch": 2.59,
"learning_rate": 5.019679873321117e-05,
"loss": 0.874,
"step": 7170,
"task_loss": 0.9047025442123413
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7837231159210205,
"epoch": 2.59,
"learning_rate": 5.0154785594540905e-05,
"loss": 0.7157,
"step": 7180,
"task_loss": 0.44350481033325195
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7090331315994263,
"epoch": 2.6,
"learning_rate": 5.0112700285825134e-05,
"loss": 0.897,
"step": 7190,
"task_loss": 0.5929510593414307
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7562023401260376,
"epoch": 2.6,
"learning_rate": 5.007054295776246e-05,
"loss": 0.8891,
"step": 7200,
"task_loss": 0.5599946975708008
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1266264915466309,
"epoch": 2.61,
"learning_rate": 5.002831376130942e-05,
"loss": 1.002,
"step": 7210,
"task_loss": 1.0086379051208496
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6372247338294983,
"epoch": 2.61,
"learning_rate": 4.9986012847679855e-05,
"loss": 0.8134,
"step": 7220,
"task_loss": 0.7226533889770508
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8293584585189819,
"epoch": 2.61,
"learning_rate": 4.9943640368344464e-05,
"loss": 0.9293,
"step": 7230,
"task_loss": 1.0355051755905151
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0152957439422607,
"epoch": 2.62,
"learning_rate": 4.990119647503016e-05,
"loss": 0.8569,
"step": 7240,
"task_loss": 1.2577180862426758
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9773650169372559,
"epoch": 2.62,
"learning_rate": 4.98586813197196e-05,
"loss": 0.8754,
"step": 7250,
"task_loss": 1.1356110572814941
},
{
"epoch": 2.62,
"eval_exact_match": 81.0879848628193,
"eval_f1": 88.62796907187332,
"step": 7250
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7708771824836731,
"epoch": 2.62,
"learning_rate": 4.9816095054650605e-05,
"loss": 0.8444,
"step": 7260,
"task_loss": 0.6651926636695862
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1072356700897217,
"epoch": 2.63,
"learning_rate": 4.977343783231563e-05,
"loss": 0.9954,
"step": 7270,
"task_loss": 1.019878625869751
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0349875688552856,
"epoch": 2.63,
"learning_rate": 4.9730709805461207e-05,
"loss": 0.8893,
"step": 7280,
"task_loss": 0.8438126444816589
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8099746704101562,
"epoch": 2.63,
"learning_rate": 4.96879111270874e-05,
"loss": 0.8308,
"step": 7290,
"task_loss": 0.702311635017395
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2747955322265625,
"epoch": 2.64,
"learning_rate": 4.964504195044729e-05,
"loss": 0.9728,
"step": 7300,
"task_loss": 1.0143351554870605
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1044409275054932,
"epoch": 2.64,
"learning_rate": 4.960210242904637e-05,
"loss": 0.9161,
"step": 7310,
"task_loss": 1.0179443359375
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9350137710571289,
"epoch": 2.65,
"learning_rate": 4.955909271664201e-05,
"loss": 0.7646,
"step": 7320,
"task_loss": 0.8446428775787354
},
{
"compression_loss": 0.0,
"distillation_loss": 1.1761360168457031,
"epoch": 2.65,
"learning_rate": 4.951601296724296e-05,
"loss": 0.9383,
"step": 7330,
"task_loss": 1.650334119796753
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2493460178375244,
"epoch": 2.65,
"learning_rate": 4.947286333510872e-05,
"loss": 0.8523,
"step": 7340,
"task_loss": 1.3208677768707275
},
{
"compression_loss": 0.0,
"distillation_loss": 1.021695852279663,
"epoch": 2.66,
"learning_rate": 4.942964397474906e-05,
"loss": 0.8172,
"step": 7350,
"task_loss": 0.7468795776367188
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8202105760574341,
"epoch": 2.66,
"learning_rate": 4.9386355040923396e-05,
"loss": 0.7931,
"step": 7360,
"task_loss": 0.6085814833641052
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8396191000938416,
"epoch": 2.66,
"learning_rate": 4.934299668864031e-05,
"loss": 0.9374,
"step": 7370,
"task_loss": 0.8447757959365845
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8239990472793579,
"epoch": 2.67,
"learning_rate": 4.929956907315692e-05,
"loss": 0.9837,
"step": 7380,
"task_loss": 0.9020982980728149
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9127852916717529,
"epoch": 2.67,
"learning_rate": 4.9256072349978375e-05,
"loss": 0.8494,
"step": 7390,
"task_loss": 0.6982762813568115
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0156875848770142,
"epoch": 2.67,
"learning_rate": 4.92125066748573e-05,
"loss": 0.8612,
"step": 7400,
"task_loss": 0.7961791753768921
},
{
"compression_loss": 0.0,
"distillation_loss": 1.091951847076416,
"epoch": 2.68,
"learning_rate": 4.916887220379319e-05,
"loss": 0.8624,
"step": 7410,
"task_loss": 0.8009432554244995
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8252087831497192,
"epoch": 2.68,
"learning_rate": 4.912516909303193e-05,
"loss": 0.8018,
"step": 7420,
"task_loss": 0.7994389533996582
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0799195766448975,
"epoch": 2.69,
"learning_rate": 4.9081397499065144e-05,
"loss": 0.8456,
"step": 7430,
"task_loss": 0.98964923620224
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9133539199829102,
"epoch": 2.69,
"learning_rate": 4.9037557578629696e-05,
"loss": 0.7018,
"step": 7440,
"task_loss": 1.0649144649505615
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7813083529472351,
"epoch": 2.69,
"learning_rate": 4.8993649488707116e-05,
"loss": 0.7919,
"step": 7450,
"task_loss": 0.7633917331695557
},
{
"compression_loss": 0.0,
"distillation_loss": 1.905107021331787,
"epoch": 2.7,
"learning_rate": 4.894967338652305e-05,
"loss": 0.9709,
"step": 7460,
"task_loss": 1.3844743967056274
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8429162502288818,
"epoch": 2.7,
"learning_rate": 4.890562942954664e-05,
"loss": 0.8252,
"step": 7470,
"task_loss": 0.8414809703826904
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7957346439361572,
"epoch": 2.7,
"learning_rate": 4.886151777549004e-05,
"loss": 0.7663,
"step": 7480,
"task_loss": 0.5705951452255249
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7642992734909058,
"epoch": 2.71,
"learning_rate": 4.8817338582307804e-05,
"loss": 0.9121,
"step": 7490,
"task_loss": 0.5456397533416748
},
{
"compression_loss": 0.0,
"distillation_loss": 0.651875376701355,
"epoch": 2.71,
"learning_rate": 4.877309200819631e-05,
"loss": 0.8589,
"step": 7500,
"task_loss": 1.0916081666946411
},
{
"epoch": 2.71,
"eval_exact_match": 81.42857142857143,
"eval_f1": 88.55823715684251,
"step": 7500
},
{
"compression_loss": 0.0,
"distillation_loss": 1.2253670692443848,
"epoch": 2.71,
"learning_rate": 4.872877821159325e-05,
"loss": 0.8689,
"step": 7510,
"task_loss": 0.8776388764381409
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7492198944091797,
"epoch": 2.72,
"learning_rate": 4.868439735117698e-05,
"loss": 0.907,
"step": 7520,
"task_loss": 0.6415897607803345
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6262823343276978,
"epoch": 2.72,
"learning_rate": 4.863994958586604e-05,
"loss": 0.8402,
"step": 7530,
"task_loss": 0.598420262336731
},
{
"compression_loss": 0.0,
"distillation_loss": 0.678829550743103,
"epoch": 2.72,
"learning_rate": 4.85954350748185e-05,
"loss": 0.8539,
"step": 7540,
"task_loss": 0.8512382507324219
},
{
"compression_loss": 0.0,
"distillation_loss": 0.43244946002960205,
"epoch": 2.73,
"learning_rate": 4.855085397743149e-05,
"loss": 0.913,
"step": 7550,
"task_loss": 0.4512510895729065
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9347466230392456,
"epoch": 2.73,
"learning_rate": 4.850620645334051e-05,
"loss": 1.0302,
"step": 7560,
"task_loss": 1.0103384256362915
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0060513019561768,
"epoch": 2.74,
"learning_rate": 4.8461492662418953e-05,
"loss": 1.0036,
"step": 7570,
"task_loss": 1.0222327709197998
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8404515981674194,
"epoch": 2.74,
"learning_rate": 4.8416712764777496e-05,
"loss": 0.9034,
"step": 7580,
"task_loss": 1.377656102180481
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6688359975814819,
"epoch": 2.74,
"learning_rate": 4.837186692076353e-05,
"loss": 0.7879,
"step": 7590,
"task_loss": 0.7730237245559692
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0158637762069702,
"epoch": 2.75,
"learning_rate": 4.832695529096059e-05,
"loss": 0.8966,
"step": 7600,
"task_loss": 0.9950785636901855
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8134995698928833,
"epoch": 2.75,
"learning_rate": 4.8281978036187764e-05,
"loss": 0.9182,
"step": 7610,
"task_loss": 0.6545308828353882
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5976283550262451,
"epoch": 2.75,
"learning_rate": 4.823693531749912e-05,
"loss": 0.8383,
"step": 7620,
"task_loss": 0.8224766254425049
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8086435794830322,
"epoch": 2.76,
"learning_rate": 4.8191827296183185e-05,
"loss": 0.9204,
"step": 7630,
"task_loss": 0.6123383641242981
},
{
"compression_loss": 0.0,
"distillation_loss": 0.7645212411880493,
"epoch": 2.76,
"learning_rate": 4.814665413376226e-05,
"loss": 0.783,
"step": 7640,
"task_loss": 0.8700419068336487
},
{
"compression_loss": 0.0,
"distillation_loss": 1.0566661357879639,
"epoch": 2.76,
"learning_rate": 4.8101415991991965e-05,
"loss": 0.8458,
"step": 7650,
"task_loss": 1.4529619216918945
},
{
"compression_loss": 0.0,
"distillation_loss": 1.050924301147461,
"epoch": 2.77,
"learning_rate": 4.805611303286053e-05,
"loss": 0.8807,
"step": 7660,
"task_loss": 1.085532784461975
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5178258419036865,
"epoch": 2.77,
"learning_rate": 4.801074541858835e-05,
"loss": 0.803,
"step": 7670,
"task_loss": 0.5137467384338379
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5854527354240417,
"epoch": 2.78,
"learning_rate": 4.7965313311627286e-05,
"loss": 0.8646,
"step": 7680,
"task_loss": 0.5222367644309998
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6712926030158997,
"epoch": 2.78,
"learning_rate": 4.7919816874660145e-05,
"loss": 0.8015,
"step": 7690,
"task_loss": 0.6108640432357788
},
{
"compression_loss": 0.0,
"distillation_loss": 0.8817957639694214,
"epoch": 2.78,
"learning_rate": 4.787425627060011e-05,
"loss": 0.8394,
"step": 7700,
"task_loss": 1.0321855545043945
},
{
"compression_loss": 0.0,
"distillation_loss": 1.3753621578216553,
"epoch": 2.79,
"learning_rate": 4.78286316625901e-05,
"loss": 0.9047,
"step": 7710,
"task_loss": 1.332418441772461
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6856953501701355,
"epoch": 2.79,
"learning_rate": 4.778294321400225e-05,
"loss": 0.8821,
"step": 7720,
"task_loss": 1.043306589126587
},
{
"compression_loss": 0.0,
"distillation_loss": 0.5687346458435059,
"epoch": 2.79,
"learning_rate": 4.773719108843727e-05,
"loss": 0.8,
"step": 7730,
"task_loss": 0.7144980430603027
},
{
"compression_loss": 0.0,
"distillation_loss": 0.6648460626602173,
"epoch": 2.8,
"learning_rate": 4.769137544972389e-05,
"loss": 0.7907,
"step": 7740,
"task_loss": 1.0056627988815308
},
{
"compression_loss": 0.0,
"distillation_loss": 0.9545556306838989,
"epoch": 2.8,
"learning_rate": 4.764549646191827e-05,
"loss": 0.8531,
"step": 7750,
"task_loss": 1.079555869102478
},
{
"epoch": 2.8,
"eval_exact_match": 81.42857142857143,
"eval_f1": 88.77005789642749,
"step": 7750
}
],
"max_steps": 55340,
"num_train_epochs": 20,
"total_flos": 5914832349173760.0,
"trial_name": null,
"trial_params": null
}