|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8008673653776652, |
|
"global_step": 7750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 13.863856315612793, |
|
"epoch": 0.0, |
|
"learning_rate": 1.5178894109143475e-07, |
|
"loss": 14.3579, |
|
"step": 10, |
|
"task_loss": 5.79541015625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.529892921447754, |
|
"epoch": 0.01, |
|
"learning_rate": 3.6863028550777017e-07, |
|
"loss": 14.5251, |
|
"step": 20, |
|
"task_loss": 5.82769775390625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.667880058288574, |
|
"epoch": 0.01, |
|
"learning_rate": 5.637874954824719e-07, |
|
"loss": 14.2874, |
|
"step": 30, |
|
"task_loss": 5.782501220703125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 16.32543182373047, |
|
"epoch": 0.01, |
|
"learning_rate": 7.806288398988074e-07, |
|
"loss": 14.482, |
|
"step": 40, |
|
"task_loss": 5.93621826171875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.878634452819824, |
|
"epoch": 0.02, |
|
"learning_rate": 9.974701843151429e-07, |
|
"loss": 14.0581, |
|
"step": 50, |
|
"task_loss": 5.87786865234375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.618017196655273, |
|
"epoch": 0.02, |
|
"learning_rate": 1.214311528731478e-06, |
|
"loss": 14.4649, |
|
"step": 60, |
|
"task_loss": 5.95428466796875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.689329147338867, |
|
"epoch": 0.03, |
|
"learning_rate": 1.4311528731478136e-06, |
|
"loss": 14.1966, |
|
"step": 70, |
|
"task_loss": 5.9617919921875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.500862121582031, |
|
"epoch": 0.03, |
|
"learning_rate": 1.647994217564149e-06, |
|
"loss": 14.1659, |
|
"step": 80, |
|
"task_loss": 5.9317626953125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.836941719055176, |
|
"epoch": 0.03, |
|
"learning_rate": 1.8648355619804843e-06, |
|
"loss": 14.0619, |
|
"step": 90, |
|
"task_loss": 5.79229736328125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.45178508758545, |
|
"epoch": 0.04, |
|
"learning_rate": 2.08167690639682e-06, |
|
"loss": 13.9798, |
|
"step": 100, |
|
"task_loss": 5.859405517578125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.564159393310547, |
|
"epoch": 0.04, |
|
"learning_rate": 2.298518250813155e-06, |
|
"loss": 14.0794, |
|
"step": 110, |
|
"task_loss": 5.789794921875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 13.552111625671387, |
|
"epoch": 0.04, |
|
"learning_rate": 2.5153595952294906e-06, |
|
"loss": 13.8458, |
|
"step": 120, |
|
"task_loss": 5.688568115234375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.749038696289062, |
|
"epoch": 0.05, |
|
"learning_rate": 2.732200939645826e-06, |
|
"loss": 13.8899, |
|
"step": 130, |
|
"task_loss": 5.49835205078125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.9246826171875, |
|
"epoch": 0.05, |
|
"learning_rate": 2.9490422840621613e-06, |
|
"loss": 13.7831, |
|
"step": 140, |
|
"task_loss": 5.456787109375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.531848907470703, |
|
"epoch": 0.05, |
|
"learning_rate": 3.1658836284784965e-06, |
|
"loss": 13.4898, |
|
"step": 150, |
|
"task_loss": 5.613800048828125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 15.012535095214844, |
|
"epoch": 0.06, |
|
"learning_rate": 3.382724972894832e-06, |
|
"loss": 13.6949, |
|
"step": 160, |
|
"task_loss": 5.2825927734375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.039583206176758, |
|
"epoch": 0.06, |
|
"learning_rate": 3.5995663173111676e-06, |
|
"loss": 13.182, |
|
"step": 170, |
|
"task_loss": 5.353515625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 13.0057373046875, |
|
"epoch": 0.07, |
|
"learning_rate": 3.816407661727503e-06, |
|
"loss": 12.6947, |
|
"step": 180, |
|
"task_loss": 5.157745361328125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 14.364778518676758, |
|
"epoch": 0.07, |
|
"learning_rate": 4.033249006143838e-06, |
|
"loss": 12.8377, |
|
"step": 190, |
|
"task_loss": 5.151580810546875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 12.603987693786621, |
|
"epoch": 0.07, |
|
"learning_rate": 4.2500903505601735e-06, |
|
"loss": 12.5114, |
|
"step": 200, |
|
"task_loss": 5.133880615234375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 13.770363807678223, |
|
"epoch": 0.08, |
|
"learning_rate": 4.466931694976509e-06, |
|
"loss": 12.571, |
|
"step": 210, |
|
"task_loss": 4.792236328125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 12.380254745483398, |
|
"epoch": 0.08, |
|
"learning_rate": 4.683773039392844e-06, |
|
"loss": 12.1934, |
|
"step": 220, |
|
"task_loss": 4.9013671875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 12.504887580871582, |
|
"epoch": 0.08, |
|
"learning_rate": 4.900614383809179e-06, |
|
"loss": 12.2029, |
|
"step": 230, |
|
"task_loss": 4.988494873046875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 12.855850219726562, |
|
"epoch": 0.09, |
|
"learning_rate": 5.117455728225516e-06, |
|
"loss": 11.5489, |
|
"step": 240, |
|
"task_loss": 4.76690673828125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 11.811483383178711, |
|
"epoch": 0.09, |
|
"learning_rate": 5.3342970726418505e-06, |
|
"loss": 11.297, |
|
"step": 250, |
|
"task_loss": 4.54803466796875 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_exact_match": 2.7341532639545885, |
|
"eval_f1": 12.629112049670779, |
|
"step": 250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 11.374210357666016, |
|
"epoch": 0.09, |
|
"learning_rate": 5.551138417058186e-06, |
|
"loss": 10.9207, |
|
"step": 260, |
|
"task_loss": 4.5309906005859375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 10.759222030639648, |
|
"epoch": 0.1, |
|
"learning_rate": 5.767979761474522e-06, |
|
"loss": 10.4359, |
|
"step": 270, |
|
"task_loss": 4.0685882568359375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 10.636564254760742, |
|
"epoch": 0.1, |
|
"learning_rate": 5.984821105890856e-06, |
|
"loss": 9.9935, |
|
"step": 280, |
|
"task_loss": 4.374786376953125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 10.036893844604492, |
|
"epoch": 0.1, |
|
"learning_rate": 6.201662450307192e-06, |
|
"loss": 9.4053, |
|
"step": 290, |
|
"task_loss": 3.8459930419921875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 9.233875274658203, |
|
"epoch": 0.11, |
|
"learning_rate": 6.4185037947235275e-06, |
|
"loss": 9.0732, |
|
"step": 300, |
|
"task_loss": 3.669677734375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 8.439445495605469, |
|
"epoch": 0.11, |
|
"learning_rate": 6.635345139139862e-06, |
|
"loss": 8.6626, |
|
"step": 310, |
|
"task_loss": 3.1441650390625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 8.574735641479492, |
|
"epoch": 0.12, |
|
"learning_rate": 6.852186483556198e-06, |
|
"loss": 8.297, |
|
"step": 320, |
|
"task_loss": 3.4480514526367188 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 8.304073333740234, |
|
"epoch": 0.12, |
|
"learning_rate": 7.069027827972534e-06, |
|
"loss": 8.0954, |
|
"step": 330, |
|
"task_loss": 3.2958412170410156 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 7.721191883087158, |
|
"epoch": 0.12, |
|
"learning_rate": 7.285869172388869e-06, |
|
"loss": 7.3317, |
|
"step": 340, |
|
"task_loss": 3.191631317138672 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 7.107563495635986, |
|
"epoch": 0.13, |
|
"learning_rate": 7.5027105168052046e-06, |
|
"loss": 7.0177, |
|
"step": 350, |
|
"task_loss": 3.102968215942383 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 7.762242794036865, |
|
"epoch": 0.13, |
|
"learning_rate": 7.71955186122154e-06, |
|
"loss": 6.7813, |
|
"step": 360, |
|
"task_loss": 4.048648834228516 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 6.884188652038574, |
|
"epoch": 0.13, |
|
"learning_rate": 7.936393205637875e-06, |
|
"loss": 6.6125, |
|
"step": 370, |
|
"task_loss": 2.772972583770752 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 6.817141056060791, |
|
"epoch": 0.14, |
|
"learning_rate": 8.15323455005421e-06, |
|
"loss": 6.1752, |
|
"step": 380, |
|
"task_loss": 3.29545259475708 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 6.7478532791137695, |
|
"epoch": 0.14, |
|
"learning_rate": 8.370075894470546e-06, |
|
"loss": 5.9283, |
|
"step": 390, |
|
"task_loss": 2.940948486328125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 6.400359153747559, |
|
"epoch": 0.14, |
|
"learning_rate": 8.586917238886882e-06, |
|
"loss": 5.7384, |
|
"step": 400, |
|
"task_loss": 3.4309067726135254 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.582571983337402, |
|
"epoch": 0.15, |
|
"learning_rate": 8.803758583303217e-06, |
|
"loss": 5.6473, |
|
"step": 410, |
|
"task_loss": 2.701523780822754 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.448908805847168, |
|
"epoch": 0.15, |
|
"learning_rate": 9.020599927719551e-06, |
|
"loss": 5.7486, |
|
"step": 420, |
|
"task_loss": 3.1738014221191406 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 6.087796211242676, |
|
"epoch": 0.16, |
|
"learning_rate": 9.237441272135887e-06, |
|
"loss": 5.4961, |
|
"step": 430, |
|
"task_loss": 3.124908447265625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.375993728637695, |
|
"epoch": 0.16, |
|
"learning_rate": 9.454282616552222e-06, |
|
"loss": 5.2309, |
|
"step": 440, |
|
"task_loss": 2.6864736080169678 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.898089408874512, |
|
"epoch": 0.16, |
|
"learning_rate": 9.671123960968558e-06, |
|
"loss": 5.3107, |
|
"step": 450, |
|
"task_loss": 2.6333131790161133 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.690750598907471, |
|
"epoch": 0.17, |
|
"learning_rate": 9.887965305384893e-06, |
|
"loss": 5.0992, |
|
"step": 460, |
|
"task_loss": 2.886949062347412 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.3047871589660645, |
|
"epoch": 0.17, |
|
"learning_rate": 1.010480664980123e-05, |
|
"loss": 5.1496, |
|
"step": 470, |
|
"task_loss": 2.6398158073425293 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.096426010131836, |
|
"epoch": 0.17, |
|
"learning_rate": 1.0321647994217564e-05, |
|
"loss": 4.888, |
|
"step": 480, |
|
"task_loss": 3.002800941467285 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.322364807128906, |
|
"epoch": 0.18, |
|
"learning_rate": 1.05384893386339e-05, |
|
"loss": 4.926, |
|
"step": 490, |
|
"task_loss": 2.943061590194702 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.079683303833008, |
|
"epoch": 0.18, |
|
"learning_rate": 1.0755330683050236e-05, |
|
"loss": 4.7843, |
|
"step": 500, |
|
"task_loss": 2.4856796264648438 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 47.6631977294229, |
|
"eval_f1": 61.84689508471956, |
|
"step": 500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.29311466217041, |
|
"epoch": 0.18, |
|
"learning_rate": 1.0972172027466571e-05, |
|
"loss": 4.6627, |
|
"step": 510, |
|
"task_loss": 2.2312231063842773 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.374029159545898, |
|
"epoch": 0.19, |
|
"learning_rate": 1.1189013371882907e-05, |
|
"loss": 4.7554, |
|
"step": 520, |
|
"task_loss": 1.571474313735962 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 5.325689315795898, |
|
"epoch": 0.19, |
|
"learning_rate": 1.1405854716299242e-05, |
|
"loss": 4.5999, |
|
"step": 530, |
|
"task_loss": 2.791367530822754 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.6214494705200195, |
|
"epoch": 0.2, |
|
"learning_rate": 1.1622696060715576e-05, |
|
"loss": 4.3316, |
|
"step": 540, |
|
"task_loss": 2.6126604080200195 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.899660110473633, |
|
"epoch": 0.2, |
|
"learning_rate": 1.1839537405131912e-05, |
|
"loss": 4.5563, |
|
"step": 550, |
|
"task_loss": 2.572834014892578 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.496711730957031, |
|
"epoch": 0.2, |
|
"learning_rate": 1.2056378749548247e-05, |
|
"loss": 4.277, |
|
"step": 560, |
|
"task_loss": 2.6678249835968018 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.555987358093262, |
|
"epoch": 0.21, |
|
"learning_rate": 1.2273220093964583e-05, |
|
"loss": 4.3194, |
|
"step": 570, |
|
"task_loss": 2.6166036128997803 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.9610090255737305, |
|
"epoch": 0.21, |
|
"learning_rate": 1.2490061438380918e-05, |
|
"loss": 4.2434, |
|
"step": 580, |
|
"task_loss": 2.6043477058410645 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.213617324829102, |
|
"epoch": 0.21, |
|
"learning_rate": 1.2706902782797254e-05, |
|
"loss": 4.273, |
|
"step": 590, |
|
"task_loss": 1.856445550918579 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.79631233215332, |
|
"epoch": 0.22, |
|
"learning_rate": 1.2923744127213588e-05, |
|
"loss": 4.3605, |
|
"step": 600, |
|
"task_loss": 2.3547120094299316 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.130844593048096, |
|
"epoch": 0.22, |
|
"learning_rate": 1.3140585471629924e-05, |
|
"loss": 4.1307, |
|
"step": 610, |
|
"task_loss": 2.737905979156494 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.68433952331543, |
|
"epoch": 0.22, |
|
"learning_rate": 1.3357426816046259e-05, |
|
"loss": 4.228, |
|
"step": 620, |
|
"task_loss": 2.0046231746673584 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.9693734645843506, |
|
"epoch": 0.23, |
|
"learning_rate": 1.3574268160462595e-05, |
|
"loss": 4.0317, |
|
"step": 630, |
|
"task_loss": 1.786435604095459 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.474964618682861, |
|
"epoch": 0.23, |
|
"learning_rate": 1.379110950487893e-05, |
|
"loss": 3.9845, |
|
"step": 640, |
|
"task_loss": 2.3881354331970215 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.220601558685303, |
|
"epoch": 0.23, |
|
"learning_rate": 1.4007950849295268e-05, |
|
"loss": 4.126, |
|
"step": 650, |
|
"task_loss": 2.3482580184936523 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.762381553649902, |
|
"epoch": 0.24, |
|
"learning_rate": 1.4224792193711601e-05, |
|
"loss": 3.9768, |
|
"step": 660, |
|
"task_loss": 2.4734535217285156 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.62750244140625, |
|
"epoch": 0.24, |
|
"learning_rate": 1.4441633538127937e-05, |
|
"loss": 3.7916, |
|
"step": 670, |
|
"task_loss": 1.8247270584106445 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.762209415435791, |
|
"epoch": 0.25, |
|
"learning_rate": 1.4658474882544273e-05, |
|
"loss": 3.7097, |
|
"step": 680, |
|
"task_loss": 1.700040578842163 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.5018386840820312, |
|
"epoch": 0.25, |
|
"learning_rate": 1.4875316226960608e-05, |
|
"loss": 3.5434, |
|
"step": 690, |
|
"task_loss": 1.8874092102050781 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.7156856060028076, |
|
"epoch": 0.25, |
|
"learning_rate": 1.5092157571376944e-05, |
|
"loss": 3.6905, |
|
"step": 700, |
|
"task_loss": 1.9769742488861084 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.2249040603637695, |
|
"epoch": 0.26, |
|
"learning_rate": 1.530899891579328e-05, |
|
"loss": 3.6165, |
|
"step": 710, |
|
"task_loss": 1.8299891948699951 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.8128280639648438, |
|
"epoch": 0.26, |
|
"learning_rate": 1.5525840260209615e-05, |
|
"loss": 3.6727, |
|
"step": 720, |
|
"task_loss": 1.8565778732299805 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.894437789916992, |
|
"epoch": 0.26, |
|
"learning_rate": 1.574268160462595e-05, |
|
"loss": 3.8419, |
|
"step": 730, |
|
"task_loss": 1.9490731954574585 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.6588613986968994, |
|
"epoch": 0.27, |
|
"learning_rate": 1.5959522949042286e-05, |
|
"loss": 3.719, |
|
"step": 740, |
|
"task_loss": 2.055631399154663 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.815298557281494, |
|
"epoch": 0.27, |
|
"learning_rate": 1.6176364293458618e-05, |
|
"loss": 3.5813, |
|
"step": 750, |
|
"task_loss": 1.935587763786316 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 58.55250709555345, |
|
"eval_f1": 70.94607775185224, |
|
"step": 750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.76840877532959, |
|
"epoch": 0.27, |
|
"learning_rate": 1.6393205637874954e-05, |
|
"loss": 3.5721, |
|
"step": 760, |
|
"task_loss": 2.5942468643188477 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.8197884559631348, |
|
"epoch": 0.28, |
|
"learning_rate": 1.661004698229129e-05, |
|
"loss": 3.406, |
|
"step": 770, |
|
"task_loss": 2.399030923843384 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.773465633392334, |
|
"epoch": 0.28, |
|
"learning_rate": 1.6826888326707625e-05, |
|
"loss": 3.6862, |
|
"step": 780, |
|
"task_loss": 2.0786447525024414 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.5512259006500244, |
|
"epoch": 0.29, |
|
"learning_rate": 1.704372967112396e-05, |
|
"loss": 3.4023, |
|
"step": 790, |
|
"task_loss": 1.7031829357147217 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.154162406921387, |
|
"epoch": 0.29, |
|
"learning_rate": 1.7260571015540296e-05, |
|
"loss": 3.5814, |
|
"step": 800, |
|
"task_loss": 2.033867597579956 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.1742305755615234, |
|
"epoch": 0.29, |
|
"learning_rate": 1.747741235995663e-05, |
|
"loss": 3.5524, |
|
"step": 810, |
|
"task_loss": 1.7235503196716309 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.667600154876709, |
|
"epoch": 0.3, |
|
"learning_rate": 1.7694253704372967e-05, |
|
"loss": 3.1374, |
|
"step": 820, |
|
"task_loss": 2.0208334922790527 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.541199207305908, |
|
"epoch": 0.3, |
|
"learning_rate": 1.7911095048789303e-05, |
|
"loss": 3.5502, |
|
"step": 830, |
|
"task_loss": 2.0909945964813232 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.158015012741089, |
|
"epoch": 0.3, |
|
"learning_rate": 1.812793639320564e-05, |
|
"loss": 3.1089, |
|
"step": 840, |
|
"task_loss": 1.5247281789779663 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.0832254886627197, |
|
"epoch": 0.31, |
|
"learning_rate": 1.8344777737621974e-05, |
|
"loss": 3.3842, |
|
"step": 850, |
|
"task_loss": 1.5567718744277954 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.231353521347046, |
|
"epoch": 0.31, |
|
"learning_rate": 1.856161908203831e-05, |
|
"loss": 3.2243, |
|
"step": 860, |
|
"task_loss": 2.2011852264404297 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.409111261367798, |
|
"epoch": 0.31, |
|
"learning_rate": 1.8778460426454645e-05, |
|
"loss": 3.1738, |
|
"step": 870, |
|
"task_loss": 1.6380929946899414 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.968132495880127, |
|
"epoch": 0.32, |
|
"learning_rate": 1.8995301770870977e-05, |
|
"loss": 3.184, |
|
"step": 880, |
|
"task_loss": 1.0741721391677856 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.7900476455688477, |
|
"epoch": 0.32, |
|
"learning_rate": 1.9212143115287313e-05, |
|
"loss": 3.3852, |
|
"step": 890, |
|
"task_loss": 2.300794839859009 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.6072299480438232, |
|
"epoch": 0.33, |
|
"learning_rate": 1.942898445970365e-05, |
|
"loss": 3.0648, |
|
"step": 900, |
|
"task_loss": 1.9764083623886108 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.560149908065796, |
|
"epoch": 0.33, |
|
"learning_rate": 1.9645825804119984e-05, |
|
"loss": 3.1941, |
|
"step": 910, |
|
"task_loss": 2.037846088409424 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.539365768432617, |
|
"epoch": 0.33, |
|
"learning_rate": 1.986266714853632e-05, |
|
"loss": 3.1535, |
|
"step": 920, |
|
"task_loss": 1.5650080442428589 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.952303409576416, |
|
"epoch": 0.34, |
|
"learning_rate": 2.007950849295266e-05, |
|
"loss": 3.0291, |
|
"step": 930, |
|
"task_loss": 2.1716508865356445 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 4.5406928062438965, |
|
"epoch": 0.34, |
|
"learning_rate": 2.0296349837368994e-05, |
|
"loss": 3.2595, |
|
"step": 940, |
|
"task_loss": 2.8979098796844482 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.1165611743927, |
|
"epoch": 0.34, |
|
"learning_rate": 2.051319118178533e-05, |
|
"loss": 3.0601, |
|
"step": 950, |
|
"task_loss": 1.5383003950119019 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.121710777282715, |
|
"epoch": 0.35, |
|
"learning_rate": 2.0730032526201665e-05, |
|
"loss": 3.1225, |
|
"step": 960, |
|
"task_loss": 1.6968241930007935 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.3333802223205566, |
|
"epoch": 0.35, |
|
"learning_rate": 2.0946873870618e-05, |
|
"loss": 3.0869, |
|
"step": 970, |
|
"task_loss": 2.1477713584899902 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.538954257965088, |
|
"epoch": 0.35, |
|
"learning_rate": 2.1163715215034336e-05, |
|
"loss": 2.8604, |
|
"step": 980, |
|
"task_loss": 1.5192737579345703 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.781994581222534, |
|
"epoch": 0.36, |
|
"learning_rate": 2.1380556559450672e-05, |
|
"loss": 2.6585, |
|
"step": 990, |
|
"task_loss": 1.9368581771850586 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.892576217651367, |
|
"epoch": 0.36, |
|
"learning_rate": 2.1597397903867004e-05, |
|
"loss": 3.2002, |
|
"step": 1000, |
|
"task_loss": 2.37716007232666 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_exact_match": 64.34247871333964, |
|
"eval_f1": 76.12832427154721, |
|
"step": 1000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.0241000652313232, |
|
"epoch": 0.37, |
|
"learning_rate": 2.181423924828334e-05, |
|
"loss": 3.118, |
|
"step": 1010, |
|
"task_loss": 1.4347963333129883 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.0104122161865234, |
|
"epoch": 0.37, |
|
"learning_rate": 2.2031080592699675e-05, |
|
"loss": 2.8552, |
|
"step": 1020, |
|
"task_loss": 1.7555286884307861 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.568011999130249, |
|
"epoch": 0.37, |
|
"learning_rate": 2.224792193711601e-05, |
|
"loss": 2.918, |
|
"step": 1030, |
|
"task_loss": 1.1540263891220093 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.7906365394592285, |
|
"epoch": 0.38, |
|
"learning_rate": 2.2464763281532346e-05, |
|
"loss": 3.0068, |
|
"step": 1040, |
|
"task_loss": 2.4095299243927 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.682779312133789, |
|
"epoch": 0.38, |
|
"learning_rate": 2.2681604625948682e-05, |
|
"loss": 2.6718, |
|
"step": 1050, |
|
"task_loss": 1.969969391822815 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5220847129821777, |
|
"epoch": 0.38, |
|
"learning_rate": 2.2898445970365018e-05, |
|
"loss": 2.6947, |
|
"step": 1060, |
|
"task_loss": 1.0111178159713745 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.6033337116241455, |
|
"epoch": 0.39, |
|
"learning_rate": 2.3115287314781353e-05, |
|
"loss": 2.8889, |
|
"step": 1070, |
|
"task_loss": 1.8982610702514648 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.147583484649658, |
|
"epoch": 0.39, |
|
"learning_rate": 2.333212865919769e-05, |
|
"loss": 2.8495, |
|
"step": 1080, |
|
"task_loss": 1.5683348178863525 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5625667572021484, |
|
"epoch": 0.39, |
|
"learning_rate": 2.3548970003614024e-05, |
|
"loss": 2.9222, |
|
"step": 1090, |
|
"task_loss": 1.891990065574646 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2590126991271973, |
|
"epoch": 0.4, |
|
"learning_rate": 2.376581134803036e-05, |
|
"loss": 2.8199, |
|
"step": 1100, |
|
"task_loss": 1.4266440868377686 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.4584269523620605, |
|
"epoch": 0.4, |
|
"learning_rate": 2.3982652692446695e-05, |
|
"loss": 2.856, |
|
"step": 1110, |
|
"task_loss": 1.945828914642334 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.8447415828704834, |
|
"epoch": 0.4, |
|
"learning_rate": 2.4199494036863028e-05, |
|
"loss": 2.5436, |
|
"step": 1120, |
|
"task_loss": 1.3307209014892578 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.670109510421753, |
|
"epoch": 0.41, |
|
"learning_rate": 2.4416335381279363e-05, |
|
"loss": 2.7268, |
|
"step": 1130, |
|
"task_loss": 1.9327623844146729 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.741763114929199, |
|
"epoch": 0.41, |
|
"learning_rate": 2.46331767256957e-05, |
|
"loss": 2.8081, |
|
"step": 1140, |
|
"task_loss": 1.631276249885559 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.837188482284546, |
|
"epoch": 0.42, |
|
"learning_rate": 2.4850018070112034e-05, |
|
"loss": 2.6292, |
|
"step": 1150, |
|
"task_loss": 1.422114372253418 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.2892446517944336, |
|
"epoch": 0.42, |
|
"learning_rate": 2.506685941452837e-05, |
|
"loss": 2.7805, |
|
"step": 1160, |
|
"task_loss": 1.5163651704788208 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5814714431762695, |
|
"epoch": 0.42, |
|
"learning_rate": 2.5283700758944705e-05, |
|
"loss": 2.7588, |
|
"step": 1170, |
|
"task_loss": 1.4490866661071777 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.4073421955108643, |
|
"epoch": 0.43, |
|
"learning_rate": 2.550054210336104e-05, |
|
"loss": 2.5651, |
|
"step": 1180, |
|
"task_loss": 1.9651927947998047 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1154234409332275, |
|
"epoch": 0.43, |
|
"learning_rate": 2.5717383447777377e-05, |
|
"loss": 2.5451, |
|
"step": 1190, |
|
"task_loss": 1.5143002271652222 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.7987661361694336, |
|
"epoch": 0.43, |
|
"learning_rate": 2.5934224792193712e-05, |
|
"loss": 2.6821, |
|
"step": 1200, |
|
"task_loss": 1.6177781820297241 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1852757930755615, |
|
"epoch": 0.44, |
|
"learning_rate": 2.6151066136610048e-05, |
|
"loss": 2.4611, |
|
"step": 1210, |
|
"task_loss": 1.2211247682571411 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.9138824939727783, |
|
"epoch": 0.44, |
|
"learning_rate": 2.6367907481026383e-05, |
|
"loss": 2.4774, |
|
"step": 1220, |
|
"task_loss": 1.7106552124023438 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2626287937164307, |
|
"epoch": 0.44, |
|
"learning_rate": 2.658474882544272e-05, |
|
"loss": 2.5318, |
|
"step": 1230, |
|
"task_loss": 1.307836890220642 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.6250033378601074, |
|
"epoch": 0.45, |
|
"learning_rate": 2.680159016985905e-05, |
|
"loss": 2.5726, |
|
"step": 1240, |
|
"task_loss": 1.559349536895752 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5571208000183105, |
|
"epoch": 0.45, |
|
"learning_rate": 2.7018431514275387e-05, |
|
"loss": 2.489, |
|
"step": 1250, |
|
"task_loss": 1.1737449169158936 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_exact_match": 67.50236518448439, |
|
"eval_f1": 78.53072236477003, |
|
"step": 1250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.1527514457702637, |
|
"epoch": 0.46, |
|
"learning_rate": 2.7235272858691722e-05, |
|
"loss": 2.6094, |
|
"step": 1260, |
|
"task_loss": 1.478395700454712 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.001819610595703, |
|
"epoch": 0.46, |
|
"learning_rate": 2.7452114203108058e-05, |
|
"loss": 2.7699, |
|
"step": 1270, |
|
"task_loss": 1.6132569313049316 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.6055197715759277, |
|
"epoch": 0.46, |
|
"learning_rate": 2.7668955547524393e-05, |
|
"loss": 2.4624, |
|
"step": 1280, |
|
"task_loss": 1.5703206062316895 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.221977472305298, |
|
"epoch": 0.47, |
|
"learning_rate": 2.788579689194073e-05, |
|
"loss": 2.4796, |
|
"step": 1290, |
|
"task_loss": 1.2309448719024658 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.17022705078125, |
|
"epoch": 0.47, |
|
"learning_rate": 2.8102638236357068e-05, |
|
"loss": 2.4461, |
|
"step": 1300, |
|
"task_loss": 1.4810001850128174 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.275075912475586, |
|
"epoch": 0.47, |
|
"learning_rate": 2.8319479580773403e-05, |
|
"loss": 2.6478, |
|
"step": 1310, |
|
"task_loss": 2.2076168060302734 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1733217239379883, |
|
"epoch": 0.48, |
|
"learning_rate": 2.853632092518974e-05, |
|
"loss": 2.3909, |
|
"step": 1320, |
|
"task_loss": 1.4496859312057495 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3826065063476562, |
|
"epoch": 0.48, |
|
"learning_rate": 2.8753162269606075e-05, |
|
"loss": 2.3423, |
|
"step": 1330, |
|
"task_loss": 1.4332823753356934 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.4819350242614746, |
|
"epoch": 0.48, |
|
"learning_rate": 2.897000361402241e-05, |
|
"loss": 2.3712, |
|
"step": 1340, |
|
"task_loss": 1.7626259326934814 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.2148211002349854, |
|
"epoch": 0.49, |
|
"learning_rate": 2.9186844958438746e-05, |
|
"loss": 2.3867, |
|
"step": 1350, |
|
"task_loss": 1.9342608451843262 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.4345703125, |
|
"epoch": 0.49, |
|
"learning_rate": 2.9403686302855078e-05, |
|
"loss": 2.3923, |
|
"step": 1360, |
|
"task_loss": 1.5897960662841797 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2300848960876465, |
|
"epoch": 0.5, |
|
"learning_rate": 2.9620527647271414e-05, |
|
"loss": 2.225, |
|
"step": 1370, |
|
"task_loss": 1.2050740718841553 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.686323881149292, |
|
"epoch": 0.5, |
|
"learning_rate": 2.983736899168775e-05, |
|
"loss": 2.3038, |
|
"step": 1380, |
|
"task_loss": 1.9944977760314941 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.4140007495880127, |
|
"epoch": 0.5, |
|
"learning_rate": 3.005421033610408e-05, |
|
"loss": 2.3103, |
|
"step": 1390, |
|
"task_loss": 1.4956495761871338 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1493027210235596, |
|
"epoch": 0.51, |
|
"learning_rate": 3.027105168052042e-05, |
|
"loss": 2.278, |
|
"step": 1400, |
|
"task_loss": 1.2530300617218018 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3148369789123535, |
|
"epoch": 0.51, |
|
"learning_rate": 3.0487893024936752e-05, |
|
"loss": 2.0819, |
|
"step": 1410, |
|
"task_loss": 1.466301441192627 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3211307525634766, |
|
"epoch": 0.51, |
|
"learning_rate": 3.070473436935309e-05, |
|
"loss": 2.2652, |
|
"step": 1420, |
|
"task_loss": 1.6483142375946045 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.6457409858703613, |
|
"epoch": 0.52, |
|
"learning_rate": 3.092157571376942e-05, |
|
"loss": 2.425, |
|
"step": 1430, |
|
"task_loss": 1.5462552309036255 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.4564802646636963, |
|
"epoch": 0.52, |
|
"learning_rate": 3.113841705818576e-05, |
|
"loss": 2.2011, |
|
"step": 1440, |
|
"task_loss": 1.7669343948364258 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.213484764099121, |
|
"epoch": 0.52, |
|
"learning_rate": 3.135525840260209e-05, |
|
"loss": 2.3324, |
|
"step": 1450, |
|
"task_loss": 1.2010239362716675 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1059863567352295, |
|
"epoch": 0.53, |
|
"learning_rate": 3.1572099747018434e-05, |
|
"loss": 2.3631, |
|
"step": 1460, |
|
"task_loss": 1.0316790342330933 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.007906913757324, |
|
"epoch": 0.53, |
|
"learning_rate": 3.178894109143476e-05, |
|
"loss": 2.2032, |
|
"step": 1470, |
|
"task_loss": 1.1084401607513428 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.6057040691375732, |
|
"epoch": 0.53, |
|
"learning_rate": 3.2005782435851105e-05, |
|
"loss": 2.3162, |
|
"step": 1480, |
|
"task_loss": 1.4449225664138794 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.419894218444824, |
|
"epoch": 0.54, |
|
"learning_rate": 3.222262378026744e-05, |
|
"loss": 2.1313, |
|
"step": 1490, |
|
"task_loss": 1.390395164489746 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.433657646179199, |
|
"epoch": 0.54, |
|
"learning_rate": 3.2439465124683776e-05, |
|
"loss": 2.3071, |
|
"step": 1500, |
|
"task_loss": 1.726881504058838 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_exact_match": 70.34058656575213, |
|
"eval_f1": 81.00798952513871, |
|
"step": 1500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9914134740829468, |
|
"epoch": 0.55, |
|
"learning_rate": 3.265630646910011e-05, |
|
"loss": 2.4203, |
|
"step": 1510, |
|
"task_loss": 1.3354167938232422 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2558791637420654, |
|
"epoch": 0.55, |
|
"learning_rate": 3.287314781351645e-05, |
|
"loss": 2.3477, |
|
"step": 1520, |
|
"task_loss": 1.4666078090667725 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.4174277782440186, |
|
"epoch": 0.55, |
|
"learning_rate": 3.308998915793278e-05, |
|
"loss": 2.226, |
|
"step": 1530, |
|
"task_loss": 1.690213918685913 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.663379430770874, |
|
"epoch": 0.56, |
|
"learning_rate": 3.330683050234912e-05, |
|
"loss": 2.2701, |
|
"step": 1540, |
|
"task_loss": 1.8850646018981934 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.476119041442871, |
|
"epoch": 0.56, |
|
"learning_rate": 3.3523671846765454e-05, |
|
"loss": 2.2315, |
|
"step": 1550, |
|
"task_loss": 1.7790265083312988 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5638389587402344, |
|
"epoch": 0.56, |
|
"learning_rate": 3.374051319118178e-05, |
|
"loss": 2.2065, |
|
"step": 1560, |
|
"task_loss": 2.4445137977600098 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.102419376373291, |
|
"epoch": 0.57, |
|
"learning_rate": 3.3957354535598125e-05, |
|
"loss": 2.0694, |
|
"step": 1570, |
|
"task_loss": 0.849961519241333 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.377769947052002, |
|
"epoch": 0.57, |
|
"learning_rate": 3.4174195880014454e-05, |
|
"loss": 2.3494, |
|
"step": 1580, |
|
"task_loss": 1.6015727519989014 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.992417812347412, |
|
"epoch": 0.57, |
|
"learning_rate": 3.4391037224430796e-05, |
|
"loss": 2.2862, |
|
"step": 1590, |
|
"task_loss": 1.7169113159179688 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1633100509643555, |
|
"epoch": 0.58, |
|
"learning_rate": 3.4607878568847125e-05, |
|
"loss": 2.2645, |
|
"step": 1600, |
|
"task_loss": 1.168750524520874 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.846243977546692, |
|
"epoch": 0.58, |
|
"learning_rate": 3.482471991326347e-05, |
|
"loss": 2.1987, |
|
"step": 1610, |
|
"task_loss": 1.0060114860534668 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.866645097732544, |
|
"epoch": 0.59, |
|
"learning_rate": 3.5041561257679796e-05, |
|
"loss": 2.1537, |
|
"step": 1620, |
|
"task_loss": 1.2443784475326538 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.573518753051758, |
|
"epoch": 0.59, |
|
"learning_rate": 3.525840260209614e-05, |
|
"loss": 2.126, |
|
"step": 1630, |
|
"task_loss": 2.0661087036132812 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.5710911750793457, |
|
"epoch": 0.59, |
|
"learning_rate": 3.547524394651247e-05, |
|
"loss": 2.2293, |
|
"step": 1640, |
|
"task_loss": 2.133748769760132 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9118821620941162, |
|
"epoch": 0.6, |
|
"learning_rate": 3.569208529092881e-05, |
|
"loss": 2.1774, |
|
"step": 1650, |
|
"task_loss": 0.9966872334480286 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.572476863861084, |
|
"epoch": 0.6, |
|
"learning_rate": 3.590892663534514e-05, |
|
"loss": 2.057, |
|
"step": 1660, |
|
"task_loss": 1.7666373252868652 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.8772165775299072, |
|
"epoch": 0.6, |
|
"learning_rate": 3.6125767979761474e-05, |
|
"loss": 2.1655, |
|
"step": 1670, |
|
"task_loss": 1.931375503540039 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8401541709899902, |
|
"epoch": 0.61, |
|
"learning_rate": 3.634260932417781e-05, |
|
"loss": 1.9505, |
|
"step": 1680, |
|
"task_loss": 1.2190885543823242 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8582175970077515, |
|
"epoch": 0.61, |
|
"learning_rate": 3.6559450668594145e-05, |
|
"loss": 2.076, |
|
"step": 1690, |
|
"task_loss": 1.3368051052093506 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.614755153656006, |
|
"epoch": 0.61, |
|
"learning_rate": 3.677629201301048e-05, |
|
"loss": 2.3466, |
|
"step": 1700, |
|
"task_loss": 1.8578553199768066 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2779784202575684, |
|
"epoch": 0.62, |
|
"learning_rate": 3.6993133357426816e-05, |
|
"loss": 2.1923, |
|
"step": 1710, |
|
"task_loss": 1.5653705596923828 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.77517032623291, |
|
"epoch": 0.62, |
|
"learning_rate": 3.720997470184315e-05, |
|
"loss": 2.0672, |
|
"step": 1720, |
|
"task_loss": 1.7407598495483398 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3437767028808594, |
|
"epoch": 0.63, |
|
"learning_rate": 3.742681604625949e-05, |
|
"loss": 2.2019, |
|
"step": 1730, |
|
"task_loss": 1.8152300119400024 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2300639152526855, |
|
"epoch": 0.63, |
|
"learning_rate": 3.764365739067582e-05, |
|
"loss": 1.9345, |
|
"step": 1740, |
|
"task_loss": 1.5370993614196777 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.157289981842041, |
|
"epoch": 0.63, |
|
"learning_rate": 3.786049873509216e-05, |
|
"loss": 1.8016, |
|
"step": 1750, |
|
"task_loss": 1.4095211029052734 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_exact_match": 71.9205298013245, |
|
"eval_f1": 82.04179007964369, |
|
"step": 1750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6568679809570312, |
|
"epoch": 0.64, |
|
"learning_rate": 3.8077340079508494e-05, |
|
"loss": 2.1419, |
|
"step": 1760, |
|
"task_loss": 1.0038526058197021 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.557297706604004, |
|
"epoch": 0.64, |
|
"learning_rate": 3.829418142392483e-05, |
|
"loss": 1.8646, |
|
"step": 1770, |
|
"task_loss": 1.6112699508666992 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2386903762817383, |
|
"epoch": 0.64, |
|
"learning_rate": 3.8511022768341165e-05, |
|
"loss": 2.1456, |
|
"step": 1780, |
|
"task_loss": 1.4335222244262695 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2769885063171387, |
|
"epoch": 0.65, |
|
"learning_rate": 3.87278641127575e-05, |
|
"loss": 2.0989, |
|
"step": 1790, |
|
"task_loss": 1.8141961097717285 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.8742246627807617, |
|
"epoch": 0.65, |
|
"learning_rate": 3.894470545717383e-05, |
|
"loss": 2.258, |
|
"step": 1800, |
|
"task_loss": 1.5317169427871704 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2876062393188477, |
|
"epoch": 0.65, |
|
"learning_rate": 3.916154680159017e-05, |
|
"loss": 1.9455, |
|
"step": 1810, |
|
"task_loss": 1.7473857402801514 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.066962718963623, |
|
"epoch": 0.66, |
|
"learning_rate": 3.93783881460065e-05, |
|
"loss": 1.8893, |
|
"step": 1820, |
|
"task_loss": 1.0266292095184326 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6544253826141357, |
|
"epoch": 0.66, |
|
"learning_rate": 3.959522949042284e-05, |
|
"loss": 2.0365, |
|
"step": 1830, |
|
"task_loss": 1.080996036529541 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3459901809692383, |
|
"epoch": 0.66, |
|
"learning_rate": 3.981207083483917e-05, |
|
"loss": 1.9773, |
|
"step": 1840, |
|
"task_loss": 1.5108176469802856 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3785009384155273, |
|
"epoch": 0.67, |
|
"learning_rate": 4.0028912179255514e-05, |
|
"loss": 2.0769, |
|
"step": 1850, |
|
"task_loss": 2.11570405960083 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.0890772342681885, |
|
"epoch": 0.67, |
|
"learning_rate": 4.024575352367185e-05, |
|
"loss": 1.9434, |
|
"step": 1860, |
|
"task_loss": 1.0435796976089478 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.71674382686615, |
|
"epoch": 0.68, |
|
"learning_rate": 4.0462594868088185e-05, |
|
"loss": 1.9389, |
|
"step": 1870, |
|
"task_loss": 1.1821942329406738 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.101773500442505, |
|
"epoch": 0.68, |
|
"learning_rate": 4.067943621250452e-05, |
|
"loss": 2.0819, |
|
"step": 1880, |
|
"task_loss": 1.3899006843566895 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.432915687561035, |
|
"epoch": 0.68, |
|
"learning_rate": 4.0896277556920857e-05, |
|
"loss": 1.9742, |
|
"step": 1890, |
|
"task_loss": 1.5817327499389648 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6859402656555176, |
|
"epoch": 0.69, |
|
"learning_rate": 4.111311890133719e-05, |
|
"loss": 2.0378, |
|
"step": 1900, |
|
"task_loss": 0.9370235800743103 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6273126602172852, |
|
"epoch": 0.69, |
|
"learning_rate": 4.132996024575352e-05, |
|
"loss": 1.9215, |
|
"step": 1910, |
|
"task_loss": 1.2056360244750977 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7799351215362549, |
|
"epoch": 0.69, |
|
"learning_rate": 4.154680159016986e-05, |
|
"loss": 1.9668, |
|
"step": 1920, |
|
"task_loss": 0.8631846904754639 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9689221382141113, |
|
"epoch": 0.7, |
|
"learning_rate": 4.176364293458619e-05, |
|
"loss": 1.9988, |
|
"step": 1930, |
|
"task_loss": 1.3810503482818604 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.826797366142273, |
|
"epoch": 0.7, |
|
"learning_rate": 4.1980484279002534e-05, |
|
"loss": 2.0241, |
|
"step": 1940, |
|
"task_loss": 1.1184316873550415 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.8849117755889893, |
|
"epoch": 0.7, |
|
"learning_rate": 4.219732562341886e-05, |
|
"loss": 2.0848, |
|
"step": 1950, |
|
"task_loss": 1.642095923423767 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6249830722808838, |
|
"epoch": 0.71, |
|
"learning_rate": 4.2414166967835206e-05, |
|
"loss": 1.9932, |
|
"step": 1960, |
|
"task_loss": 1.0573899745941162 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3523120880126953, |
|
"epoch": 0.71, |
|
"learning_rate": 4.2631008312251534e-05, |
|
"loss": 1.7783, |
|
"step": 1970, |
|
"task_loss": 0.67606520652771 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1321749687194824, |
|
"epoch": 0.72, |
|
"learning_rate": 4.284784965666788e-05, |
|
"loss": 1.7797, |
|
"step": 1980, |
|
"task_loss": 1.814510464668274 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4253501892089844, |
|
"epoch": 0.72, |
|
"learning_rate": 4.3064691001084205e-05, |
|
"loss": 1.9431, |
|
"step": 1990, |
|
"task_loss": 0.946668803691864 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3535895347595215, |
|
"epoch": 0.72, |
|
"learning_rate": 4.328153234550055e-05, |
|
"loss": 1.9957, |
|
"step": 2000, |
|
"task_loss": 1.8486053943634033 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 73.1693472090823, |
|
"eval_f1": 83.02894145240745, |
|
"step": 2000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8838860988616943, |
|
"epoch": 0.73, |
|
"learning_rate": 4.349837368991688e-05, |
|
"loss": 1.9315, |
|
"step": 2010, |
|
"task_loss": 1.5416061878204346 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8987743854522705, |
|
"epoch": 0.73, |
|
"learning_rate": 4.371521503433322e-05, |
|
"loss": 1.9744, |
|
"step": 2020, |
|
"task_loss": 1.4941067695617676 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.972581386566162, |
|
"epoch": 0.73, |
|
"learning_rate": 4.393205637874955e-05, |
|
"loss": 1.8702, |
|
"step": 2030, |
|
"task_loss": 1.1873316764831543 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1103854179382324, |
|
"epoch": 0.74, |
|
"learning_rate": 4.414889772316588e-05, |
|
"loss": 1.7989, |
|
"step": 2040, |
|
"task_loss": 1.9999923706054688 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.1972813606262207, |
|
"epoch": 0.74, |
|
"learning_rate": 4.436573906758222e-05, |
|
"loss": 2.0754, |
|
"step": 2050, |
|
"task_loss": 1.3373115062713623 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.598461627960205, |
|
"epoch": 0.74, |
|
"learning_rate": 4.4582580411998554e-05, |
|
"loss": 1.9482, |
|
"step": 2060, |
|
"task_loss": 0.7865440845489502 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3904833793640137, |
|
"epoch": 0.75, |
|
"learning_rate": 4.479942175641489e-05, |
|
"loss": 1.9746, |
|
"step": 2070, |
|
"task_loss": 1.9388031959533691 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6508259773254395, |
|
"epoch": 0.75, |
|
"learning_rate": 4.5016263100831226e-05, |
|
"loss": 1.9099, |
|
"step": 2080, |
|
"task_loss": 1.0399653911590576 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.772383451461792, |
|
"epoch": 0.76, |
|
"learning_rate": 4.523310444524756e-05, |
|
"loss": 1.7396, |
|
"step": 2090, |
|
"task_loss": 0.9913707375526428 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4372773170471191, |
|
"epoch": 0.76, |
|
"learning_rate": 4.54499457896639e-05, |
|
"loss": 1.6997, |
|
"step": 2100, |
|
"task_loss": 0.9053939580917358 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8861255645751953, |
|
"epoch": 0.76, |
|
"learning_rate": 4.566678713408023e-05, |
|
"loss": 1.9588, |
|
"step": 2110, |
|
"task_loss": 1.1382219791412354 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.492857575416565, |
|
"epoch": 0.77, |
|
"learning_rate": 4.588362847849657e-05, |
|
"loss": 2.0162, |
|
"step": 2120, |
|
"task_loss": 1.0419156551361084 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.141202926635742, |
|
"epoch": 0.77, |
|
"learning_rate": 4.6100469822912903e-05, |
|
"loss": 1.9327, |
|
"step": 2130, |
|
"task_loss": 1.0418000221252441 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.920304536819458, |
|
"epoch": 0.77, |
|
"learning_rate": 4.631731116732924e-05, |
|
"loss": 1.8879, |
|
"step": 2140, |
|
"task_loss": 1.1978967189788818 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5354628562927246, |
|
"epoch": 0.78, |
|
"learning_rate": 4.6534152511745575e-05, |
|
"loss": 1.7451, |
|
"step": 2150, |
|
"task_loss": 0.8436750173568726 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4595377445220947, |
|
"epoch": 0.78, |
|
"learning_rate": 4.675099385616191e-05, |
|
"loss": 1.7424, |
|
"step": 2160, |
|
"task_loss": 1.038141131401062 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.34226393699646, |
|
"epoch": 0.78, |
|
"learning_rate": 4.696783520057824e-05, |
|
"loss": 1.7037, |
|
"step": 2170, |
|
"task_loss": 1.9839649200439453 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8102070093154907, |
|
"epoch": 0.79, |
|
"learning_rate": 4.718467654499458e-05, |
|
"loss": 1.7556, |
|
"step": 2180, |
|
"task_loss": 1.7777656316757202 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5364153385162354, |
|
"epoch": 0.79, |
|
"learning_rate": 4.740151788941091e-05, |
|
"loss": 1.8958, |
|
"step": 2190, |
|
"task_loss": 0.9634038805961609 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3901851177215576, |
|
"epoch": 0.8, |
|
"learning_rate": 4.761835923382725e-05, |
|
"loss": 1.8381, |
|
"step": 2200, |
|
"task_loss": 1.4805151224136353 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.725096344947815, |
|
"epoch": 0.8, |
|
"learning_rate": 4.783520057824358e-05, |
|
"loss": 1.6794, |
|
"step": 2210, |
|
"task_loss": 1.4682140350341797 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2090702056884766, |
|
"epoch": 0.8, |
|
"learning_rate": 4.8052041922659924e-05, |
|
"loss": 1.9593, |
|
"step": 2220, |
|
"task_loss": 1.230177402496338 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.041393756866455, |
|
"epoch": 0.81, |
|
"learning_rate": 4.826888326707626e-05, |
|
"loss": 1.809, |
|
"step": 2230, |
|
"task_loss": 1.3010313510894775 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7629666328430176, |
|
"epoch": 0.81, |
|
"learning_rate": 4.8485724611492595e-05, |
|
"loss": 1.9261, |
|
"step": 2240, |
|
"task_loss": 0.9947839379310608 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.264925003051758, |
|
"epoch": 0.81, |
|
"learning_rate": 4.870256595590893e-05, |
|
"loss": 1.902, |
|
"step": 2250, |
|
"task_loss": 1.4164844751358032 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_exact_match": 74.87228003784296, |
|
"eval_f1": 84.24213227343729, |
|
"step": 2250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6021579504013062, |
|
"epoch": 0.82, |
|
"learning_rate": 4.8919407300325266e-05, |
|
"loss": 1.7243, |
|
"step": 2260, |
|
"task_loss": 1.630518913269043 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2044320106506348, |
|
"epoch": 0.82, |
|
"learning_rate": 4.91362486447416e-05, |
|
"loss": 1.7822, |
|
"step": 2270, |
|
"task_loss": 1.8210062980651855 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.79202401638031, |
|
"epoch": 0.82, |
|
"learning_rate": 4.935308998915793e-05, |
|
"loss": 1.7531, |
|
"step": 2280, |
|
"task_loss": 1.189091444015503 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.06649112701416, |
|
"epoch": 0.83, |
|
"learning_rate": 4.956993133357427e-05, |
|
"loss": 1.844, |
|
"step": 2290, |
|
"task_loss": 1.1661924123764038 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.682025671005249, |
|
"epoch": 0.83, |
|
"learning_rate": 4.97867726779906e-05, |
|
"loss": 1.7941, |
|
"step": 2300, |
|
"task_loss": 1.0544310808181763 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3249014616012573, |
|
"epoch": 0.83, |
|
"learning_rate": 5.0003614022406944e-05, |
|
"loss": 1.7689, |
|
"step": 2310, |
|
"task_loss": 0.8618177771568298 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.0047874450683594, |
|
"epoch": 0.84, |
|
"learning_rate": 5.022045536682327e-05, |
|
"loss": 1.6503, |
|
"step": 2320, |
|
"task_loss": 1.8260812759399414 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6946525573730469, |
|
"epoch": 0.84, |
|
"learning_rate": 5.0437296711239615e-05, |
|
"loss": 1.6434, |
|
"step": 2330, |
|
"task_loss": 0.9825450778007507 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3918914794921875, |
|
"epoch": 0.85, |
|
"learning_rate": 5.0654138055655944e-05, |
|
"loss": 1.7978, |
|
"step": 2340, |
|
"task_loss": 1.3643735647201538 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7238832712173462, |
|
"epoch": 0.85, |
|
"learning_rate": 5.0870979400072286e-05, |
|
"loss": 1.7657, |
|
"step": 2350, |
|
"task_loss": 1.165932536125183 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7872267961502075, |
|
"epoch": 0.85, |
|
"learning_rate": 5.1087820744488615e-05, |
|
"loss": 1.7084, |
|
"step": 2360, |
|
"task_loss": 1.5586845874786377 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.0670828819274902, |
|
"epoch": 0.86, |
|
"learning_rate": 5.130466208890496e-05, |
|
"loss": 1.7082, |
|
"step": 2370, |
|
"task_loss": 1.581449270248413 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.378608226776123, |
|
"epoch": 0.86, |
|
"learning_rate": 5.1521503433321286e-05, |
|
"loss": 1.755, |
|
"step": 2380, |
|
"task_loss": 0.980684757232666 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.0133185386657715, |
|
"epoch": 0.86, |
|
"learning_rate": 5.173834477773763e-05, |
|
"loss": 1.8557, |
|
"step": 2390, |
|
"task_loss": 1.6304588317871094 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.729528546333313, |
|
"epoch": 0.87, |
|
"learning_rate": 5.195518612215396e-05, |
|
"loss": 1.7122, |
|
"step": 2400, |
|
"task_loss": 1.540935754776001 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3772521018981934, |
|
"epoch": 0.87, |
|
"learning_rate": 5.217202746657029e-05, |
|
"loss": 1.7996, |
|
"step": 2410, |
|
"task_loss": 0.889678955078125 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8892799615859985, |
|
"epoch": 0.87, |
|
"learning_rate": 5.238886881098663e-05, |
|
"loss": 1.8532, |
|
"step": 2420, |
|
"task_loss": 1.5641158819198608 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5366939306259155, |
|
"epoch": 0.88, |
|
"learning_rate": 5.2605710155402964e-05, |
|
"loss": 1.5529, |
|
"step": 2430, |
|
"task_loss": 0.9511038661003113 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 3.0125789642333984, |
|
"epoch": 0.88, |
|
"learning_rate": 5.28225514998193e-05, |
|
"loss": 1.7891, |
|
"step": 2440, |
|
"task_loss": 2.29156756401062 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.3766469955444336, |
|
"epoch": 0.89, |
|
"learning_rate": 5.3039392844235635e-05, |
|
"loss": 1.6349, |
|
"step": 2450, |
|
"task_loss": 1.3916804790496826 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5166912078857422, |
|
"epoch": 0.89, |
|
"learning_rate": 5.325623418865197e-05, |
|
"loss": 1.7089, |
|
"step": 2460, |
|
"task_loss": 1.1395492553710938 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4228307008743286, |
|
"epoch": 0.89, |
|
"learning_rate": 5.3473075533068306e-05, |
|
"loss": 1.4976, |
|
"step": 2470, |
|
"task_loss": 1.0344961881637573 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.241983413696289, |
|
"epoch": 0.9, |
|
"learning_rate": 5.368991687748464e-05, |
|
"loss": 1.5189, |
|
"step": 2480, |
|
"task_loss": 1.3167787790298462 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9524130821228027, |
|
"epoch": 0.9, |
|
"learning_rate": 5.390675822190098e-05, |
|
"loss": 1.6949, |
|
"step": 2490, |
|
"task_loss": 1.677308201789856 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7088284492492676, |
|
"epoch": 0.9, |
|
"learning_rate": 5.412359956631731e-05, |
|
"loss": 1.7772, |
|
"step": 2500, |
|
"task_loss": 1.0601410865783691 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 74.76821192052981, |
|
"eval_f1": 84.28227498844669, |
|
"step": 2500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7284023761749268, |
|
"epoch": 0.91, |
|
"learning_rate": 5.434044091073365e-05, |
|
"loss": 1.7942, |
|
"step": 2510, |
|
"task_loss": 1.2665657997131348 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8622101545333862, |
|
"epoch": 0.91, |
|
"learning_rate": 5.455728225514998e-05, |
|
"loss": 1.6878, |
|
"step": 2520, |
|
"task_loss": 1.2940951585769653 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3830608129501343, |
|
"epoch": 0.91, |
|
"learning_rate": 5.477412359956632e-05, |
|
"loss": 1.628, |
|
"step": 2530, |
|
"task_loss": 1.0994198322296143 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.014238119125366, |
|
"epoch": 0.92, |
|
"learning_rate": 5.499096494398265e-05, |
|
"loss": 1.5324, |
|
"step": 2540, |
|
"task_loss": 1.940596103668213 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.274725914001465, |
|
"epoch": 0.92, |
|
"learning_rate": 5.520780628839899e-05, |
|
"loss": 1.6801, |
|
"step": 2550, |
|
"task_loss": 1.9991852045059204 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5515773296356201, |
|
"epoch": 0.93, |
|
"learning_rate": 5.542464763281532e-05, |
|
"loss": 1.649, |
|
"step": 2560, |
|
"task_loss": 1.3755850791931152 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5370211601257324, |
|
"epoch": 0.93, |
|
"learning_rate": 5.564148897723166e-05, |
|
"loss": 1.6046, |
|
"step": 2570, |
|
"task_loss": 1.4268473386764526 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6613398790359497, |
|
"epoch": 0.93, |
|
"learning_rate": 5.585833032164799e-05, |
|
"loss": 1.5996, |
|
"step": 2580, |
|
"task_loss": 0.9327517151832581 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.47848641872406, |
|
"epoch": 0.94, |
|
"learning_rate": 5.607517166606433e-05, |
|
"loss": 1.713, |
|
"step": 2590, |
|
"task_loss": 0.9023648500442505 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7783399820327759, |
|
"epoch": 0.94, |
|
"learning_rate": 5.629201301048067e-05, |
|
"loss": 1.6175, |
|
"step": 2600, |
|
"task_loss": 1.1293476819992065 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9461766481399536, |
|
"epoch": 0.94, |
|
"learning_rate": 5.6508854354897004e-05, |
|
"loss": 1.6266, |
|
"step": 2610, |
|
"task_loss": 1.0467326641082764 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.567404866218567, |
|
"epoch": 0.95, |
|
"learning_rate": 5.672569569931334e-05, |
|
"loss": 1.8742, |
|
"step": 2620, |
|
"task_loss": 1.0961357355117798 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.381565809249878, |
|
"epoch": 0.95, |
|
"learning_rate": 5.6942537043729675e-05, |
|
"loss": 1.6772, |
|
"step": 2630, |
|
"task_loss": 1.4862456321716309 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3669390678405762, |
|
"epoch": 0.95, |
|
"learning_rate": 5.715937838814601e-05, |
|
"loss": 1.6426, |
|
"step": 2640, |
|
"task_loss": 1.7144615650177002 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4433438777923584, |
|
"epoch": 0.96, |
|
"learning_rate": 5.737621973256234e-05, |
|
"loss": 1.5835, |
|
"step": 2650, |
|
"task_loss": 1.1417357921600342 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4862744808197021, |
|
"epoch": 0.96, |
|
"learning_rate": 5.759306107697868e-05, |
|
"loss": 1.7158, |
|
"step": 2660, |
|
"task_loss": 0.9948188066482544 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.210496187210083, |
|
"epoch": 0.96, |
|
"learning_rate": 5.780990242139501e-05, |
|
"loss": 1.796, |
|
"step": 2670, |
|
"task_loss": 1.136523723602295 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4428620338439941, |
|
"epoch": 0.97, |
|
"learning_rate": 5.802674376581135e-05, |
|
"loss": 1.632, |
|
"step": 2680, |
|
"task_loss": 0.7866153120994568 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.05281662940979, |
|
"epoch": 0.97, |
|
"learning_rate": 5.824358511022768e-05, |
|
"loss": 1.5928, |
|
"step": 2690, |
|
"task_loss": 0.7293530106544495 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4683610200881958, |
|
"epoch": 0.98, |
|
"learning_rate": 5.8460426454644024e-05, |
|
"loss": 1.4844, |
|
"step": 2700, |
|
"task_loss": 0.6756585836410522 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5305559635162354, |
|
"epoch": 0.98, |
|
"learning_rate": 5.867726779906035e-05, |
|
"loss": 1.5709, |
|
"step": 2710, |
|
"task_loss": 1.3757680654525757 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.54231595993042, |
|
"epoch": 0.98, |
|
"learning_rate": 5.8894109143476696e-05, |
|
"loss": 1.5264, |
|
"step": 2720, |
|
"task_loss": 1.2477138042449951 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7188405990600586, |
|
"epoch": 0.99, |
|
"learning_rate": 5.9110950487893024e-05, |
|
"loss": 1.6459, |
|
"step": 2730, |
|
"task_loss": 1.166077971458435 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.8873350620269775, |
|
"epoch": 0.99, |
|
"learning_rate": 5.932779183230937e-05, |
|
"loss": 1.7529, |
|
"step": 2740, |
|
"task_loss": 1.92569899559021 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3297995328903198, |
|
"epoch": 0.99, |
|
"learning_rate": 5.9544633176725695e-05, |
|
"loss": 1.3638, |
|
"step": 2750, |
|
"task_loss": 1.0652023553848267 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 76.26300851466415, |
|
"eval_f1": 85.48165328587461, |
|
"step": 2750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8641085624694824, |
|
"epoch": 1.0, |
|
"learning_rate": 5.976147452114203e-05, |
|
"loss": 1.628, |
|
"step": 2760, |
|
"task_loss": 1.1830246448516846 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2197985649108887, |
|
"epoch": 1.0, |
|
"learning_rate": 5.9978315865558367e-05, |
|
"loss": 1.4132, |
|
"step": 2770, |
|
"task_loss": 1.025228500366211 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6511722803115845, |
|
"epoch": 1.0, |
|
"learning_rate": 5.999995649340457e-05, |
|
"loss": 1.4993, |
|
"step": 2780, |
|
"task_loss": 1.00981605052948 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4045798778533936, |
|
"epoch": 1.01, |
|
"learning_rate": 5.9999806100397186e-05, |
|
"loss": 1.2535, |
|
"step": 2790, |
|
"task_loss": 0.8313103318214417 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8121707439422607, |
|
"epoch": 1.01, |
|
"learning_rate": 5.999954828439778e-05, |
|
"loss": 1.4897, |
|
"step": 2800, |
|
"task_loss": 1.642676830291748 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6657578945159912, |
|
"epoch": 1.02, |
|
"learning_rate": 5.999918304632955e-05, |
|
"loss": 1.5951, |
|
"step": 2810, |
|
"task_loss": 1.2371957302093506 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5906226634979248, |
|
"epoch": 1.02, |
|
"learning_rate": 5.999871038750032e-05, |
|
"loss": 1.4819, |
|
"step": 2820, |
|
"task_loss": 1.2596104145050049 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4319016933441162, |
|
"epoch": 1.02, |
|
"learning_rate": 5.999813030960259e-05, |
|
"loss": 1.4997, |
|
"step": 2830, |
|
"task_loss": 1.103452205657959 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.490473985671997, |
|
"epoch": 1.03, |
|
"learning_rate": 5.99974428147135e-05, |
|
"loss": 1.496, |
|
"step": 2840, |
|
"task_loss": 1.228161096572876 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.737858772277832, |
|
"epoch": 1.03, |
|
"learning_rate": 5.999664790529482e-05, |
|
"loss": 1.3197, |
|
"step": 2850, |
|
"task_loss": 1.2385176420211792 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8279550075531006, |
|
"epoch": 1.03, |
|
"learning_rate": 5.9995745584192954e-05, |
|
"loss": 1.5408, |
|
"step": 2860, |
|
"task_loss": 1.2323086261749268 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.0422871112823486, |
|
"epoch": 1.04, |
|
"learning_rate": 5.9994735854638916e-05, |
|
"loss": 1.5051, |
|
"step": 2870, |
|
"task_loss": 1.5764563083648682 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6473441123962402, |
|
"epoch": 1.04, |
|
"learning_rate": 5.999361872024835e-05, |
|
"loss": 1.431, |
|
"step": 2880, |
|
"task_loss": 1.0629370212554932 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0594135522842407, |
|
"epoch": 1.04, |
|
"learning_rate": 5.9992394185021474e-05, |
|
"loss": 1.3824, |
|
"step": 2890, |
|
"task_loss": 0.8771895170211792 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4817508459091187, |
|
"epoch": 1.05, |
|
"learning_rate": 5.999106225334308e-05, |
|
"loss": 1.3694, |
|
"step": 2900, |
|
"task_loss": 0.8263894319534302 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1033601760864258, |
|
"epoch": 1.05, |
|
"learning_rate": 5.998962292998255e-05, |
|
"loss": 1.3645, |
|
"step": 2910, |
|
"task_loss": 1.4630831480026245 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2878718376159668, |
|
"epoch": 1.06, |
|
"learning_rate": 5.99880762200938e-05, |
|
"loss": 1.4124, |
|
"step": 2920, |
|
"task_loss": 0.641461968421936 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.33697772026062, |
|
"epoch": 1.06, |
|
"learning_rate": 5.9986422129215255e-05, |
|
"loss": 1.5289, |
|
"step": 2930, |
|
"task_loss": 1.9020227193832397 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.129021167755127, |
|
"epoch": 1.06, |
|
"learning_rate": 5.998466066326988e-05, |
|
"loss": 1.4724, |
|
"step": 2940, |
|
"task_loss": 1.0405011177062988 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0331007242202759, |
|
"epoch": 1.07, |
|
"learning_rate": 5.998279182856511e-05, |
|
"loss": 1.3023, |
|
"step": 2950, |
|
"task_loss": 0.7486757040023804 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3960323333740234, |
|
"epoch": 1.07, |
|
"learning_rate": 5.9980815631792844e-05, |
|
"loss": 1.483, |
|
"step": 2960, |
|
"task_loss": 1.3152785301208496 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7268671989440918, |
|
"epoch": 1.07, |
|
"learning_rate": 5.997873208002943e-05, |
|
"loss": 1.4653, |
|
"step": 2970, |
|
"task_loss": 1.341980218887329 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3513455390930176, |
|
"epoch": 1.08, |
|
"learning_rate": 5.997654118073564e-05, |
|
"loss": 1.4756, |
|
"step": 2980, |
|
"task_loss": 1.1631968021392822 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6115731000900269, |
|
"epoch": 1.08, |
|
"learning_rate": 5.99742429417566e-05, |
|
"loss": 1.4344, |
|
"step": 2990, |
|
"task_loss": 0.9157006740570068 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3850129842758179, |
|
"epoch": 1.08, |
|
"learning_rate": 5.997183737132184e-05, |
|
"loss": 1.553, |
|
"step": 3000, |
|
"task_loss": 1.205183744430542 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_exact_match": 75.96972563859981, |
|
"eval_f1": 85.09594063703909, |
|
"step": 3000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6631824970245361, |
|
"epoch": 1.09, |
|
"learning_rate": 5.99693244780452e-05, |
|
"loss": 1.4326, |
|
"step": 3010, |
|
"task_loss": 1.2684969902038574 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7456169128417969, |
|
"epoch": 1.09, |
|
"learning_rate": 5.996670427092481e-05, |
|
"loss": 1.4876, |
|
"step": 3020, |
|
"task_loss": 1.0792250633239746 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8971850872039795, |
|
"epoch": 1.1, |
|
"learning_rate": 5.996397675934309e-05, |
|
"loss": 1.6011, |
|
"step": 3030, |
|
"task_loss": 1.1246172189712524 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.48433518409729, |
|
"epoch": 1.1, |
|
"learning_rate": 5.996114195306668e-05, |
|
"loss": 1.4407, |
|
"step": 3040, |
|
"task_loss": 1.1288623809814453 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5189406871795654, |
|
"epoch": 1.1, |
|
"learning_rate": 5.995819986224643e-05, |
|
"loss": 1.6002, |
|
"step": 3050, |
|
"task_loss": 1.3538494110107422 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7961808443069458, |
|
"epoch": 1.11, |
|
"learning_rate": 5.995515049741734e-05, |
|
"loss": 1.5678, |
|
"step": 3060, |
|
"task_loss": 1.821069598197937 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3772679567337036, |
|
"epoch": 1.11, |
|
"learning_rate": 5.995199386949855e-05, |
|
"loss": 1.4492, |
|
"step": 3070, |
|
"task_loss": 1.3162784576416016 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.128143548965454, |
|
"epoch": 1.11, |
|
"learning_rate": 5.994872998979327e-05, |
|
"loss": 1.4028, |
|
"step": 3080, |
|
"task_loss": 1.0793228149414062 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6217098236083984, |
|
"epoch": 1.12, |
|
"learning_rate": 5.9945358869988796e-05, |
|
"loss": 1.5213, |
|
"step": 3090, |
|
"task_loss": 1.173330307006836 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3088253736495972, |
|
"epoch": 1.12, |
|
"learning_rate": 5.994188052215636e-05, |
|
"loss": 1.3198, |
|
"step": 3100, |
|
"task_loss": 0.9987668395042419 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6880836486816406, |
|
"epoch": 1.12, |
|
"learning_rate": 5.993829495875121e-05, |
|
"loss": 1.3887, |
|
"step": 3110, |
|
"task_loss": 1.2901586294174194 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.024364948272705, |
|
"epoch": 1.13, |
|
"learning_rate": 5.9934602192612506e-05, |
|
"loss": 1.2978, |
|
"step": 3120, |
|
"task_loss": 0.765283465385437 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4316623210906982, |
|
"epoch": 1.13, |
|
"learning_rate": 5.9930802236963245e-05, |
|
"loss": 1.5247, |
|
"step": 3130, |
|
"task_loss": 0.8690903186798096 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6122153997421265, |
|
"epoch": 1.13, |
|
"learning_rate": 5.992689510541028e-05, |
|
"loss": 1.42, |
|
"step": 3140, |
|
"task_loss": 1.2866846323013306 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.323101282119751, |
|
"epoch": 1.14, |
|
"learning_rate": 5.992288081194423e-05, |
|
"loss": 1.3262, |
|
"step": 3150, |
|
"task_loss": 1.4218419790267944 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2536742687225342, |
|
"epoch": 1.14, |
|
"learning_rate": 5.991875937093943e-05, |
|
"loss": 1.4379, |
|
"step": 3160, |
|
"task_loss": 1.2389776706695557 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0673775672912598, |
|
"epoch": 1.15, |
|
"learning_rate": 5.991453079715389e-05, |
|
"loss": 1.3706, |
|
"step": 3170, |
|
"task_loss": 0.973718523979187 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4817243814468384, |
|
"epoch": 1.15, |
|
"learning_rate": 5.991019510572925e-05, |
|
"loss": 1.3935, |
|
"step": 3180, |
|
"task_loss": 0.9717172384262085 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.444387435913086, |
|
"epoch": 1.15, |
|
"learning_rate": 5.990575231219071e-05, |
|
"loss": 1.3291, |
|
"step": 3190, |
|
"task_loss": 1.6736479997634888 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3083770275115967, |
|
"epoch": 1.16, |
|
"learning_rate": 5.9901202432446966e-05, |
|
"loss": 1.1301, |
|
"step": 3200, |
|
"task_loss": 1.0902491807937622 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2770934104919434, |
|
"epoch": 1.16, |
|
"learning_rate": 5.989654548279019e-05, |
|
"loss": 1.3383, |
|
"step": 3210, |
|
"task_loss": 0.8913994431495667 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9612149000167847, |
|
"epoch": 1.16, |
|
"learning_rate": 5.989178147989594e-05, |
|
"loss": 1.2876, |
|
"step": 3220, |
|
"task_loss": 0.866273045539856 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.750402569770813, |
|
"epoch": 1.17, |
|
"learning_rate": 5.988691044082309e-05, |
|
"loss": 1.3676, |
|
"step": 3230, |
|
"task_loss": 1.519735336303711 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3474881649017334, |
|
"epoch": 1.17, |
|
"learning_rate": 5.988193238301383e-05, |
|
"loss": 1.1273, |
|
"step": 3240, |
|
"task_loss": 0.7416272163391113 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.463036060333252, |
|
"epoch": 1.17, |
|
"learning_rate": 5.987684732429352e-05, |
|
"loss": 1.3382, |
|
"step": 3250, |
|
"task_loss": 1.3737218379974365 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 77.60643330179754, |
|
"eval_f1": 86.28265990982867, |
|
"step": 3250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9102663397789001, |
|
"epoch": 1.18, |
|
"learning_rate": 5.987165528287069e-05, |
|
"loss": 1.3387, |
|
"step": 3260, |
|
"task_loss": 0.9284595251083374 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6090664863586426, |
|
"epoch": 1.18, |
|
"learning_rate": 5.9866356277336964e-05, |
|
"loss": 1.293, |
|
"step": 3270, |
|
"task_loss": 1.4329043626785278 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5263841152191162, |
|
"epoch": 1.19, |
|
"learning_rate": 5.9860950326666935e-05, |
|
"loss": 1.4872, |
|
"step": 3280, |
|
"task_loss": 1.0852687358856201 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2740449905395508, |
|
"epoch": 1.19, |
|
"learning_rate": 5.985543745021821e-05, |
|
"loss": 1.3863, |
|
"step": 3290, |
|
"task_loss": 1.306997537612915 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2338653802871704, |
|
"epoch": 1.19, |
|
"learning_rate": 5.98498176677312e-05, |
|
"loss": 1.2642, |
|
"step": 3300, |
|
"task_loss": 0.6805293560028076 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0945351123809814, |
|
"epoch": 1.2, |
|
"learning_rate": 5.98440909993292e-05, |
|
"loss": 1.2649, |
|
"step": 3310, |
|
"task_loss": 0.9938749074935913 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.241112232208252, |
|
"epoch": 1.2, |
|
"learning_rate": 5.983825746551817e-05, |
|
"loss": 1.4417, |
|
"step": 3320, |
|
"task_loss": 2.230130910873413 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2390515804290771, |
|
"epoch": 1.2, |
|
"learning_rate": 5.9832317087186795e-05, |
|
"loss": 1.368, |
|
"step": 3330, |
|
"task_loss": 0.8296461701393127 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5527942180633545, |
|
"epoch": 1.21, |
|
"learning_rate": 5.982626988560631e-05, |
|
"loss": 1.4451, |
|
"step": 3340, |
|
"task_loss": 1.1470730304718018 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8781901001930237, |
|
"epoch": 1.21, |
|
"learning_rate": 5.9820115882430476e-05, |
|
"loss": 1.3569, |
|
"step": 3350, |
|
"task_loss": 0.6853946447372437 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.358176350593567, |
|
"epoch": 1.21, |
|
"learning_rate": 5.981385509969547e-05, |
|
"loss": 1.2828, |
|
"step": 3360, |
|
"task_loss": 1.4298112392425537 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2665464878082275, |
|
"epoch": 1.22, |
|
"learning_rate": 5.980748755981984e-05, |
|
"loss": 1.3211, |
|
"step": 3370, |
|
"task_loss": 1.4579181671142578 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.965965211391449, |
|
"epoch": 1.22, |
|
"learning_rate": 5.980101328560442e-05, |
|
"loss": 1.3321, |
|
"step": 3380, |
|
"task_loss": 0.7342075109481812 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5226565599441528, |
|
"epoch": 1.23, |
|
"learning_rate": 5.979443230023221e-05, |
|
"loss": 1.4229, |
|
"step": 3390, |
|
"task_loss": 1.4543688297271729 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.332014799118042, |
|
"epoch": 1.23, |
|
"learning_rate": 5.978774462726834e-05, |
|
"loss": 1.3364, |
|
"step": 3400, |
|
"task_loss": 0.9676311612129211 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0038352012634277, |
|
"epoch": 1.23, |
|
"learning_rate": 5.9780950290659965e-05, |
|
"loss": 1.2919, |
|
"step": 3410, |
|
"task_loss": 0.9453527331352234 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.033329725265503, |
|
"epoch": 1.24, |
|
"learning_rate": 5.977404931473615e-05, |
|
"loss": 1.2794, |
|
"step": 3420, |
|
"task_loss": 0.5825457572937012 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0678160190582275, |
|
"epoch": 1.24, |
|
"learning_rate": 5.976704172420787e-05, |
|
"loss": 1.2897, |
|
"step": 3430, |
|
"task_loss": 1.085493803024292 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.299585223197937, |
|
"epoch": 1.24, |
|
"learning_rate": 5.975992754416782e-05, |
|
"loss": 1.2425, |
|
"step": 3440, |
|
"task_loss": 0.9612342119216919 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2164323329925537, |
|
"epoch": 1.25, |
|
"learning_rate": 5.975270680009036e-05, |
|
"loss": 1.3221, |
|
"step": 3450, |
|
"task_loss": 1.375262975692749 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5335781574249268, |
|
"epoch": 1.25, |
|
"learning_rate": 5.974537951783148e-05, |
|
"loss": 1.4045, |
|
"step": 3460, |
|
"task_loss": 1.7295887470245361 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2506041526794434, |
|
"epoch": 1.25, |
|
"learning_rate": 5.9737945723628635e-05, |
|
"loss": 1.2224, |
|
"step": 3470, |
|
"task_loss": 1.2525482177734375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.2599070072174072, |
|
"epoch": 1.26, |
|
"learning_rate": 5.973040544410066e-05, |
|
"loss": 1.3498, |
|
"step": 3480, |
|
"task_loss": 1.7536072731018066 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.718400239944458, |
|
"epoch": 1.26, |
|
"learning_rate": 5.972275870624773e-05, |
|
"loss": 1.2841, |
|
"step": 3490, |
|
"task_loss": 1.339259147644043 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5253331661224365, |
|
"epoch": 1.26, |
|
"learning_rate": 5.971500553745119e-05, |
|
"loss": 1.4498, |
|
"step": 3500, |
|
"task_loss": 0.9578819274902344 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 78.38221381267739, |
|
"eval_f1": 86.75896485683346, |
|
"step": 3500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6366232633590698, |
|
"epoch": 1.27, |
|
"learning_rate": 5.9707145965473516e-05, |
|
"loss": 1.4598, |
|
"step": 3510, |
|
"task_loss": 1.8735042810440063 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0762263536453247, |
|
"epoch": 1.27, |
|
"learning_rate": 5.969918001845817e-05, |
|
"loss": 1.2466, |
|
"step": 3520, |
|
"task_loss": 0.8542821407318115 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3200879096984863, |
|
"epoch": 1.28, |
|
"learning_rate": 5.969110772492954e-05, |
|
"loss": 1.269, |
|
"step": 3530, |
|
"task_loss": 1.0112990140914917 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1145355701446533, |
|
"epoch": 1.28, |
|
"learning_rate": 5.968292911379281e-05, |
|
"loss": 1.4295, |
|
"step": 3540, |
|
"task_loss": 0.8630874752998352 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.371903657913208, |
|
"epoch": 1.28, |
|
"learning_rate": 5.967464421433385e-05, |
|
"loss": 1.3867, |
|
"step": 3550, |
|
"task_loss": 1.1606991291046143 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.486452341079712, |
|
"epoch": 1.29, |
|
"learning_rate": 5.966625305621916e-05, |
|
"loss": 1.2152, |
|
"step": 3560, |
|
"task_loss": 1.3023685216903687 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.478879690170288, |
|
"epoch": 1.29, |
|
"learning_rate": 5.965775566949571e-05, |
|
"loss": 1.238, |
|
"step": 3570, |
|
"task_loss": 1.165961503982544 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.483465552330017, |
|
"epoch": 1.29, |
|
"learning_rate": 5.964915208459085e-05, |
|
"loss": 1.3574, |
|
"step": 3580, |
|
"task_loss": 1.0961742401123047 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.294953465461731, |
|
"epoch": 1.3, |
|
"learning_rate": 5.9640442332312195e-05, |
|
"loss": 1.2928, |
|
"step": 3590, |
|
"task_loss": 0.8126479387283325 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0170869827270508, |
|
"epoch": 1.3, |
|
"learning_rate": 5.963162644384755e-05, |
|
"loss": 1.1543, |
|
"step": 3600, |
|
"task_loss": 0.8312715291976929 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5762726068496704, |
|
"epoch": 1.3, |
|
"learning_rate": 5.9622704450764756e-05, |
|
"loss": 1.2988, |
|
"step": 3610, |
|
"task_loss": 0.9494091272354126 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0313997268676758, |
|
"epoch": 1.31, |
|
"learning_rate": 5.9613676385011585e-05, |
|
"loss": 1.1966, |
|
"step": 3620, |
|
"task_loss": 0.8011406660079956 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.594609260559082, |
|
"epoch": 1.31, |
|
"learning_rate": 5.960454227891564e-05, |
|
"loss": 1.4918, |
|
"step": 3630, |
|
"task_loss": 1.3429150581359863 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.721564531326294, |
|
"epoch": 1.32, |
|
"learning_rate": 5.9595302165184246e-05, |
|
"loss": 1.423, |
|
"step": 3640, |
|
"task_loss": 1.5580472946166992 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9121531248092651, |
|
"epoch": 1.32, |
|
"learning_rate": 5.9585956076904284e-05, |
|
"loss": 1.3897, |
|
"step": 3650, |
|
"task_loss": 1.109339952468872 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6109825372695923, |
|
"epoch": 1.32, |
|
"learning_rate": 5.9576504047542156e-05, |
|
"loss": 1.41, |
|
"step": 3660, |
|
"task_loss": 0.9962501525878906 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4419324398040771, |
|
"epoch": 1.33, |
|
"learning_rate": 5.956694611094356e-05, |
|
"loss": 1.2896, |
|
"step": 3670, |
|
"task_loss": 0.6805934906005859 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3609169721603394, |
|
"epoch": 1.33, |
|
"learning_rate": 5.955728230133347e-05, |
|
"loss": 1.2873, |
|
"step": 3680, |
|
"task_loss": 0.8458003997802734 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1782557964324951, |
|
"epoch": 1.33, |
|
"learning_rate": 5.954751265331597e-05, |
|
"loss": 1.3991, |
|
"step": 3690, |
|
"task_loss": 1.114344835281372 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5857070684432983, |
|
"epoch": 1.34, |
|
"learning_rate": 5.9537637201874086e-05, |
|
"loss": 1.2406, |
|
"step": 3700, |
|
"task_loss": 1.243434190750122 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4138120412826538, |
|
"epoch": 1.34, |
|
"learning_rate": 5.952765598236975e-05, |
|
"loss": 1.3789, |
|
"step": 3710, |
|
"task_loss": 0.6327986717224121 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.529003620147705, |
|
"epoch": 1.34, |
|
"learning_rate": 5.95175690305436e-05, |
|
"loss": 1.3643, |
|
"step": 3720, |
|
"task_loss": 1.2512716054916382 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.472851037979126, |
|
"epoch": 1.35, |
|
"learning_rate": 5.950737638251488e-05, |
|
"loss": 1.311, |
|
"step": 3730, |
|
"task_loss": 1.6475411653518677 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.9043711423873901, |
|
"epoch": 1.35, |
|
"learning_rate": 5.9497078074781344e-05, |
|
"loss": 1.3004, |
|
"step": 3740, |
|
"task_loss": 1.6194053888320923 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0727344751358032, |
|
"epoch": 1.36, |
|
"learning_rate": 5.948667414421904e-05, |
|
"loss": 1.2777, |
|
"step": 3750, |
|
"task_loss": 1.061992883682251 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_exact_match": 78.76064333017976, |
|
"eval_f1": 87.07184243155801, |
|
"step": 3750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.449849247932434, |
|
"epoch": 1.36, |
|
"learning_rate": 5.947616462808226e-05, |
|
"loss": 1.4524, |
|
"step": 3760, |
|
"task_loss": 0.9752452373504639 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.321598768234253, |
|
"epoch": 1.36, |
|
"learning_rate": 5.946554956400337e-05, |
|
"loss": 1.3379, |
|
"step": 3770, |
|
"task_loss": 1.1972038745880127 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2405821084976196, |
|
"epoch": 1.37, |
|
"learning_rate": 5.945482898999269e-05, |
|
"loss": 1.3715, |
|
"step": 3780, |
|
"task_loss": 1.1690441370010376 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1473963260650635, |
|
"epoch": 1.37, |
|
"learning_rate": 5.9444002944438315e-05, |
|
"loss": 1.2458, |
|
"step": 3790, |
|
"task_loss": 0.898491382598877 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.310509443283081, |
|
"epoch": 1.37, |
|
"learning_rate": 5.943307146610606e-05, |
|
"loss": 1.353, |
|
"step": 3800, |
|
"task_loss": 1.1752980947494507 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2511919736862183, |
|
"epoch": 1.38, |
|
"learning_rate": 5.942203459413925e-05, |
|
"loss": 1.1887, |
|
"step": 3810, |
|
"task_loss": 1.156741738319397 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2756710052490234, |
|
"epoch": 1.38, |
|
"learning_rate": 5.941089236805858e-05, |
|
"loss": 1.2036, |
|
"step": 3820, |
|
"task_loss": 0.9336721301078796 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8774323463439941, |
|
"epoch": 1.38, |
|
"learning_rate": 5.9399644827762026e-05, |
|
"loss": 1.2673, |
|
"step": 3830, |
|
"task_loss": 1.782167673110962 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3417766094207764, |
|
"epoch": 1.39, |
|
"learning_rate": 5.938829201352467e-05, |
|
"loss": 1.2705, |
|
"step": 3840, |
|
"task_loss": 1.7044801712036133 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4353686571121216, |
|
"epoch": 1.39, |
|
"learning_rate": 5.937683396599854e-05, |
|
"loss": 1.1901, |
|
"step": 3850, |
|
"task_loss": 1.3189786672592163 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0781793594360352, |
|
"epoch": 1.4, |
|
"learning_rate": 5.9365270726212497e-05, |
|
"loss": 1.2764, |
|
"step": 3860, |
|
"task_loss": 0.7336215376853943 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6718968152999878, |
|
"epoch": 1.4, |
|
"learning_rate": 5.935360233557207e-05, |
|
"loss": 1.3173, |
|
"step": 3870, |
|
"task_loss": 1.3569270372390747 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3217222690582275, |
|
"epoch": 1.4, |
|
"learning_rate": 5.934182883585932e-05, |
|
"loss": 1.3197, |
|
"step": 3880, |
|
"task_loss": 1.3436063528060913 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8721194267272949, |
|
"epoch": 1.41, |
|
"learning_rate": 5.9329950269232654e-05, |
|
"loss": 1.2029, |
|
"step": 3890, |
|
"task_loss": 0.7747288942337036 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2571134567260742, |
|
"epoch": 1.41, |
|
"learning_rate": 5.9317966678226725e-05, |
|
"loss": 1.4305, |
|
"step": 3900, |
|
"task_loss": 1.112687110900879 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2334290742874146, |
|
"epoch": 1.41, |
|
"learning_rate": 5.930587810575225e-05, |
|
"loss": 1.3896, |
|
"step": 3910, |
|
"task_loss": 0.8130580186843872 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6533408164978027, |
|
"epoch": 1.42, |
|
"learning_rate": 5.929368459509586e-05, |
|
"loss": 1.1132, |
|
"step": 3920, |
|
"task_loss": 1.5629518032073975 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.402529001235962, |
|
"epoch": 1.42, |
|
"learning_rate": 5.9281386189919965e-05, |
|
"loss": 1.1492, |
|
"step": 3930, |
|
"task_loss": 1.2752315998077393 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1418523788452148, |
|
"epoch": 1.42, |
|
"learning_rate": 5.926898293426255e-05, |
|
"loss": 1.2519, |
|
"step": 3940, |
|
"task_loss": 1.0082168579101562 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.589359164237976, |
|
"epoch": 1.43, |
|
"learning_rate": 5.925647487253707e-05, |
|
"loss": 1.2353, |
|
"step": 3950, |
|
"task_loss": 1.2787137031555176 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0001301765441895, |
|
"epoch": 1.43, |
|
"learning_rate": 5.9243862049532264e-05, |
|
"loss": 1.3728, |
|
"step": 3960, |
|
"task_loss": 0.9775519967079163 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7581260204315186, |
|
"epoch": 1.43, |
|
"learning_rate": 5.9231144510411994e-05, |
|
"loss": 1.3053, |
|
"step": 3970, |
|
"task_loss": 1.2524868249893188 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1798157691955566, |
|
"epoch": 1.44, |
|
"learning_rate": 5.921832230071508e-05, |
|
"loss": 1.3237, |
|
"step": 3980, |
|
"task_loss": 1.0549383163452148 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2880291938781738, |
|
"epoch": 1.44, |
|
"learning_rate": 5.9205395466355186e-05, |
|
"loss": 1.2282, |
|
"step": 3990, |
|
"task_loss": 0.7487226128578186 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.314383625984192, |
|
"epoch": 1.45, |
|
"learning_rate": 5.9192364053620554e-05, |
|
"loss": 1.3069, |
|
"step": 4000, |
|
"task_loss": 1.3157868385314941 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_exact_match": 78.62819299905392, |
|
"eval_f1": 87.15414215858682, |
|
"step": 4000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.793086051940918, |
|
"epoch": 1.45, |
|
"learning_rate": 5.917922810917394e-05, |
|
"loss": 1.175, |
|
"step": 4010, |
|
"task_loss": 0.5701212286949158 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.295424461364746, |
|
"epoch": 1.45, |
|
"learning_rate": 5.91659876800524e-05, |
|
"loss": 1.2627, |
|
"step": 4020, |
|
"task_loss": 0.9144801497459412 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0756990909576416, |
|
"epoch": 1.46, |
|
"learning_rate": 5.9152642813667135e-05, |
|
"loss": 1.2696, |
|
"step": 4030, |
|
"task_loss": 0.8201438188552856 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3234689235687256, |
|
"epoch": 1.46, |
|
"learning_rate": 5.913919355780329e-05, |
|
"loss": 1.3347, |
|
"step": 4040, |
|
"task_loss": 1.1706106662750244 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0304203033447266, |
|
"epoch": 1.46, |
|
"learning_rate": 5.912563996061981e-05, |
|
"loss": 1.1509, |
|
"step": 4050, |
|
"task_loss": 1.0333361625671387 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8914575576782227, |
|
"epoch": 1.47, |
|
"learning_rate": 5.9111982070649294e-05, |
|
"loss": 1.2647, |
|
"step": 4060, |
|
"task_loss": 1.0233019590377808 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5193976163864136, |
|
"epoch": 1.47, |
|
"learning_rate": 5.909821993679775e-05, |
|
"loss": 1.336, |
|
"step": 4070, |
|
"task_loss": 1.1142679452896118 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.550922155380249, |
|
"epoch": 1.47, |
|
"learning_rate": 5.908435360834451e-05, |
|
"loss": 1.2458, |
|
"step": 4080, |
|
"task_loss": 1.194461464881897 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4292857646942139, |
|
"epoch": 1.48, |
|
"learning_rate": 5.9070383134941953e-05, |
|
"loss": 1.3929, |
|
"step": 4090, |
|
"task_loss": 1.3362990617752075 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2451667785644531, |
|
"epoch": 1.48, |
|
"learning_rate": 5.9056308566615434e-05, |
|
"loss": 1.1498, |
|
"step": 4100, |
|
"task_loss": 0.7560177445411682 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4586573839187622, |
|
"epoch": 1.49, |
|
"learning_rate": 5.904212995376298e-05, |
|
"loss": 1.1929, |
|
"step": 4110, |
|
"task_loss": 1.1912312507629395 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1983109712600708, |
|
"epoch": 1.49, |
|
"learning_rate": 5.9027847347155253e-05, |
|
"loss": 1.1934, |
|
"step": 4120, |
|
"task_loss": 1.048568844795227 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7856746912002563, |
|
"epoch": 1.49, |
|
"learning_rate": 5.901346079793525e-05, |
|
"loss": 1.25, |
|
"step": 4130, |
|
"task_loss": 1.6442539691925049 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.616405963897705, |
|
"epoch": 1.5, |
|
"learning_rate": 5.899897035761817e-05, |
|
"loss": 1.2341, |
|
"step": 4140, |
|
"task_loss": 1.356785774230957 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2377030849456787, |
|
"epoch": 1.5, |
|
"learning_rate": 5.898437607809124e-05, |
|
"loss": 1.1177, |
|
"step": 4150, |
|
"task_loss": 1.0740208625793457 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0152101516723633, |
|
"epoch": 1.5, |
|
"learning_rate": 5.896967801161349e-05, |
|
"loss": 1.3281, |
|
"step": 4160, |
|
"task_loss": 1.027454137802124 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5116093158721924, |
|
"epoch": 1.51, |
|
"learning_rate": 5.895487621081562e-05, |
|
"loss": 1.2888, |
|
"step": 4170, |
|
"task_loss": 1.2181144952774048 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1471505165100098, |
|
"epoch": 1.51, |
|
"learning_rate": 5.893997072869975e-05, |
|
"loss": 1.1994, |
|
"step": 4180, |
|
"task_loss": 0.8226631879806519 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0915307998657227, |
|
"epoch": 1.51, |
|
"learning_rate": 5.892496161863928e-05, |
|
"loss": 1.2187, |
|
"step": 4190, |
|
"task_loss": 0.5205323100090027 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2426114082336426, |
|
"epoch": 1.52, |
|
"learning_rate": 5.8909848934378674e-05, |
|
"loss": 1.1759, |
|
"step": 4200, |
|
"task_loss": 1.330606460571289 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.944818913936615, |
|
"epoch": 1.52, |
|
"learning_rate": 5.889463273003328e-05, |
|
"loss": 1.2345, |
|
"step": 4210, |
|
"task_loss": 0.6433451175689697 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1503543853759766, |
|
"epoch": 1.53, |
|
"learning_rate": 5.887931306008911e-05, |
|
"loss": 1.241, |
|
"step": 4220, |
|
"task_loss": 1.847876787185669 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0494859218597412, |
|
"epoch": 1.53, |
|
"learning_rate": 5.8863889979402696e-05, |
|
"loss": 1.2631, |
|
"step": 4230, |
|
"task_loss": 0.9382550716400146 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.224144458770752, |
|
"epoch": 1.53, |
|
"learning_rate": 5.8848363543200816e-05, |
|
"loss": 1.2842, |
|
"step": 4240, |
|
"task_loss": 1.0627460479736328 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.124106764793396, |
|
"epoch": 1.54, |
|
"learning_rate": 5.88327338070804e-05, |
|
"loss": 1.3314, |
|
"step": 4250, |
|
"task_loss": 1.1247596740722656 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_exact_match": 78.56196783349101, |
|
"eval_f1": 87.1287016330811, |
|
"step": 4250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6598260402679443, |
|
"epoch": 1.54, |
|
"learning_rate": 5.8817000827008224e-05, |
|
"loss": 1.1831, |
|
"step": 4260, |
|
"task_loss": 1.4295686483383179 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6338021755218506, |
|
"epoch": 1.54, |
|
"learning_rate": 5.88011646593208e-05, |
|
"loss": 1.3631, |
|
"step": 4270, |
|
"task_loss": 1.3802179098129272 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0921931266784668, |
|
"epoch": 1.55, |
|
"learning_rate": 5.878522536072409e-05, |
|
"loss": 1.0914, |
|
"step": 4280, |
|
"task_loss": 1.2787001132965088 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1799564361572266, |
|
"epoch": 1.55, |
|
"learning_rate": 5.876918298829337e-05, |
|
"loss": 1.1864, |
|
"step": 4290, |
|
"task_loss": 1.129196047782898 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8012198209762573, |
|
"epoch": 1.55, |
|
"learning_rate": 5.875303759947301e-05, |
|
"loss": 1.1393, |
|
"step": 4300, |
|
"task_loss": 1.3329541683197021 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3091764450073242, |
|
"epoch": 1.56, |
|
"learning_rate": 5.873678925207624e-05, |
|
"loss": 1.3255, |
|
"step": 4310, |
|
"task_loss": 1.0994157791137695 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.598827600479126, |
|
"epoch": 1.56, |
|
"learning_rate": 5.872043800428498e-05, |
|
"loss": 1.1574, |
|
"step": 4320, |
|
"task_loss": 1.1182217597961426 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.304560899734497, |
|
"epoch": 1.56, |
|
"learning_rate": 5.870398391464961e-05, |
|
"loss": 1.153, |
|
"step": 4330, |
|
"task_loss": 1.2183809280395508 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5170609951019287, |
|
"epoch": 1.57, |
|
"learning_rate": 5.868742704208875e-05, |
|
"loss": 1.3349, |
|
"step": 4340, |
|
"task_loss": 1.345301628112793 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2407028675079346, |
|
"epoch": 1.57, |
|
"learning_rate": 5.867076744588908e-05, |
|
"loss": 1.2039, |
|
"step": 4350, |
|
"task_loss": 1.1238722801208496 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2521941661834717, |
|
"epoch": 1.58, |
|
"learning_rate": 5.8654005185705114e-05, |
|
"loss": 1.1578, |
|
"step": 4360, |
|
"task_loss": 0.9843517541885376 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4626522064208984, |
|
"epoch": 1.58, |
|
"learning_rate": 5.863714032155897e-05, |
|
"loss": 1.2691, |
|
"step": 4370, |
|
"task_loss": 0.9148292541503906 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6399775743484497, |
|
"epoch": 1.58, |
|
"learning_rate": 5.8620172913840186e-05, |
|
"loss": 1.2639, |
|
"step": 4380, |
|
"task_loss": 1.2807812690734863 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.63152277469635, |
|
"epoch": 1.59, |
|
"learning_rate": 5.860310302330548e-05, |
|
"loss": 1.3362, |
|
"step": 4390, |
|
"task_loss": 1.241642713546753 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8616843223571777, |
|
"epoch": 1.59, |
|
"learning_rate": 5.8585930711078514e-05, |
|
"loss": 1.2129, |
|
"step": 4400, |
|
"task_loss": 1.7467364072799683 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.506812334060669, |
|
"epoch": 1.59, |
|
"learning_rate": 5.856865603864975e-05, |
|
"loss": 1.4104, |
|
"step": 4410, |
|
"task_loss": 1.2961748838424683 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5448143482208252, |
|
"epoch": 1.6, |
|
"learning_rate": 5.855127906787615e-05, |
|
"loss": 1.2444, |
|
"step": 4420, |
|
"task_loss": 0.8679540157318115 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8945345878601074, |
|
"epoch": 1.6, |
|
"learning_rate": 5.853379986098098e-05, |
|
"loss": 1.2738, |
|
"step": 4430, |
|
"task_loss": 1.0072073936462402 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8297909498214722, |
|
"epoch": 1.6, |
|
"learning_rate": 5.85162184805536e-05, |
|
"loss": 1.3369, |
|
"step": 4440, |
|
"task_loss": 0.6960026025772095 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.204261064529419, |
|
"epoch": 1.61, |
|
"learning_rate": 5.849853498954926e-05, |
|
"loss": 1.2531, |
|
"step": 4450, |
|
"task_loss": 0.7097033262252808 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0963704586029053, |
|
"epoch": 1.61, |
|
"learning_rate": 5.848074945128877e-05, |
|
"loss": 1.1261, |
|
"step": 4460, |
|
"task_loss": 1.0379323959350586 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0468240976333618, |
|
"epoch": 1.62, |
|
"learning_rate": 5.846286192945845e-05, |
|
"loss": 1.1079, |
|
"step": 4470, |
|
"task_loss": 0.7108166813850403 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8962523341178894, |
|
"epoch": 1.62, |
|
"learning_rate": 5.844487248810972e-05, |
|
"loss": 1.1465, |
|
"step": 4480, |
|
"task_loss": 0.902863621711731 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2622666358947754, |
|
"epoch": 1.62, |
|
"learning_rate": 5.8426781191659e-05, |
|
"loss": 1.1282, |
|
"step": 4490, |
|
"task_loss": 1.0960261821746826 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3841495513916016, |
|
"epoch": 1.63, |
|
"learning_rate": 5.840858810488741e-05, |
|
"loss": 1.1508, |
|
"step": 4500, |
|
"task_loss": 1.3540650606155396 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_exact_match": 79.4228949858089, |
|
"eval_f1": 87.49554502449215, |
|
"step": 4500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1759483814239502, |
|
"epoch": 1.63, |
|
"learning_rate": 5.8390293292940554e-05, |
|
"loss": 1.2905, |
|
"step": 4510, |
|
"task_loss": 1.0437816381454468 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.183558702468872, |
|
"epoch": 1.63, |
|
"learning_rate": 5.837189682132831e-05, |
|
"loss": 1.1933, |
|
"step": 4520, |
|
"task_loss": 1.1283735036849976 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1170258522033691, |
|
"epoch": 1.64, |
|
"learning_rate": 5.8353398755924576e-05, |
|
"loss": 1.2729, |
|
"step": 4530, |
|
"task_loss": 1.0031014680862427 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4635521173477173, |
|
"epoch": 1.64, |
|
"learning_rate": 5.833479916296704e-05, |
|
"loss": 1.2865, |
|
"step": 4540, |
|
"task_loss": 1.0605723857879639 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9159049391746521, |
|
"epoch": 1.64, |
|
"learning_rate": 5.8316098109056905e-05, |
|
"loss": 1.119, |
|
"step": 4550, |
|
"task_loss": 0.7583830952644348 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0998435020446777, |
|
"epoch": 1.65, |
|
"learning_rate": 5.829729566115874e-05, |
|
"loss": 1.0501, |
|
"step": 4560, |
|
"task_loss": 0.7868894338607788 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 2.326512575149536, |
|
"epoch": 1.65, |
|
"learning_rate": 5.827839188660012e-05, |
|
"loss": 1.5112, |
|
"step": 4570, |
|
"task_loss": 2.1364526748657227 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4276812076568604, |
|
"epoch": 1.66, |
|
"learning_rate": 5.825938685307151e-05, |
|
"loss": 1.185, |
|
"step": 4580, |
|
"task_loss": 1.419746994972229 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2273736000061035, |
|
"epoch": 1.66, |
|
"learning_rate": 5.824028062862592e-05, |
|
"loss": 1.0967, |
|
"step": 4590, |
|
"task_loss": 0.9421610832214355 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2494888305664062, |
|
"epoch": 1.66, |
|
"learning_rate": 5.822107328167873e-05, |
|
"loss": 1.2568, |
|
"step": 4600, |
|
"task_loss": 1.162670612335205 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.387427568435669, |
|
"epoch": 1.67, |
|
"learning_rate": 5.8201764881007395e-05, |
|
"loss": 1.2342, |
|
"step": 4610, |
|
"task_loss": 1.1487003564834595 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7944004535675049, |
|
"epoch": 1.67, |
|
"learning_rate": 5.8182355495751244e-05, |
|
"loss": 1.0527, |
|
"step": 4620, |
|
"task_loss": 0.8616865873336792 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0871223211288452, |
|
"epoch": 1.67, |
|
"learning_rate": 5.8162845195411193e-05, |
|
"loss": 1.0867, |
|
"step": 4630, |
|
"task_loss": 1.0131196975708008 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1689753532409668, |
|
"epoch": 1.68, |
|
"learning_rate": 5.814323404984954e-05, |
|
"loss": 1.239, |
|
"step": 4640, |
|
"task_loss": 0.9322003126144409 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7448208928108215, |
|
"epoch": 1.68, |
|
"learning_rate": 5.8123522129289646e-05, |
|
"loss": 1.0301, |
|
"step": 4650, |
|
"task_loss": 0.4830577075481415 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9654221534729004, |
|
"epoch": 1.68, |
|
"learning_rate": 5.8103709504315755e-05, |
|
"loss": 1.1417, |
|
"step": 4660, |
|
"task_loss": 1.0902562141418457 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.245806097984314, |
|
"epoch": 1.69, |
|
"learning_rate": 5.808379624587272e-05, |
|
"loss": 1.1587, |
|
"step": 4670, |
|
"task_loss": 1.5773122310638428 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8751307129859924, |
|
"epoch": 1.69, |
|
"learning_rate": 5.806378242526572e-05, |
|
"loss": 1.2707, |
|
"step": 4680, |
|
"task_loss": 0.7810333967208862 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.968869686126709, |
|
"epoch": 1.69, |
|
"learning_rate": 5.804366811416004e-05, |
|
"loss": 1.0442, |
|
"step": 4690, |
|
"task_loss": 0.7142177224159241 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0504038333892822, |
|
"epoch": 1.7, |
|
"learning_rate": 5.8023453384580784e-05, |
|
"loss": 1.2519, |
|
"step": 4700, |
|
"task_loss": 0.9022510051727295 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1235523223876953, |
|
"epoch": 1.7, |
|
"learning_rate": 5.800313830891265e-05, |
|
"loss": 1.3013, |
|
"step": 4710, |
|
"task_loss": 0.8178448677062988 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7393571138381958, |
|
"epoch": 1.71, |
|
"learning_rate": 5.798272295989965e-05, |
|
"loss": 1.28, |
|
"step": 4720, |
|
"task_loss": 0.8073184490203857 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.7382944822311401, |
|
"epoch": 1.71, |
|
"learning_rate": 5.796220741064486e-05, |
|
"loss": 1.4239, |
|
"step": 4730, |
|
"task_loss": 1.6766483783721924 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2721589803695679, |
|
"epoch": 1.71, |
|
"learning_rate": 5.794159173461013e-05, |
|
"loss": 1.0153, |
|
"step": 4740, |
|
"task_loss": 1.1552908420562744 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.029242753982544, |
|
"epoch": 1.72, |
|
"learning_rate": 5.7920876005615866e-05, |
|
"loss": 1.2561, |
|
"step": 4750, |
|
"task_loss": 1.1739318370819092 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_exact_match": 79.47019867549669, |
|
"eval_f1": 87.64686274053376, |
|
"step": 4750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.001348614692688, |
|
"epoch": 1.72, |
|
"learning_rate": 5.790006029784072e-05, |
|
"loss": 1.0992, |
|
"step": 4760, |
|
"task_loss": 0.8562111258506775 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5794899463653564, |
|
"epoch": 1.72, |
|
"learning_rate": 5.787914468582138e-05, |
|
"loss": 1.1878, |
|
"step": 4770, |
|
"task_loss": 1.2599658966064453 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5452587604522705, |
|
"epoch": 1.73, |
|
"learning_rate": 5.7858129244452245e-05, |
|
"loss": 1.1883, |
|
"step": 4780, |
|
"task_loss": 1.063377857208252 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1784594058990479, |
|
"epoch": 1.73, |
|
"learning_rate": 5.783701404898518e-05, |
|
"loss": 1.1611, |
|
"step": 4790, |
|
"task_loss": 0.8253315091133118 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.212900996208191, |
|
"epoch": 1.73, |
|
"learning_rate": 5.781579917502926e-05, |
|
"loss": 1.2314, |
|
"step": 4800, |
|
"task_loss": 0.9298550486564636 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.448960542678833, |
|
"epoch": 1.74, |
|
"learning_rate": 5.7794484698550484e-05, |
|
"loss": 1.1093, |
|
"step": 4810, |
|
"task_loss": 1.1630914211273193 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6462247371673584, |
|
"epoch": 1.74, |
|
"learning_rate": 5.777307069587152e-05, |
|
"loss": 1.1482, |
|
"step": 4820, |
|
"task_loss": 1.3052637577056885 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3470487594604492, |
|
"epoch": 1.75, |
|
"learning_rate": 5.775155724367138e-05, |
|
"loss": 1.0766, |
|
"step": 4830, |
|
"task_loss": 1.4758813381195068 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2199983596801758, |
|
"epoch": 1.75, |
|
"learning_rate": 5.7729944418985225e-05, |
|
"loss": 1.1171, |
|
"step": 4840, |
|
"task_loss": 0.9905753135681152 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9857596755027771, |
|
"epoch": 1.75, |
|
"learning_rate": 5.770823229920403e-05, |
|
"loss": 1.1234, |
|
"step": 4850, |
|
"task_loss": 1.0089142322540283 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3423244953155518, |
|
"epoch": 1.76, |
|
"learning_rate": 5.7686420962074325e-05, |
|
"loss": 1.0579, |
|
"step": 4860, |
|
"task_loss": 0.8072549104690552 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5363335609436035, |
|
"epoch": 1.76, |
|
"learning_rate": 5.766451048569792e-05, |
|
"loss": 1.2695, |
|
"step": 4870, |
|
"task_loss": 1.405518889427185 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8228256702423096, |
|
"epoch": 1.76, |
|
"learning_rate": 5.7642500948531614e-05, |
|
"loss": 1.1095, |
|
"step": 4880, |
|
"task_loss": 0.8263888955116272 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6186543703079224, |
|
"epoch": 1.77, |
|
"learning_rate": 5.762039242938693e-05, |
|
"loss": 1.2104, |
|
"step": 4890, |
|
"task_loss": 1.1083335876464844 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.022383451461792, |
|
"epoch": 1.77, |
|
"learning_rate": 5.759818500742981e-05, |
|
"loss": 1.139, |
|
"step": 4900, |
|
"task_loss": 0.8983309268951416 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3909627199172974, |
|
"epoch": 1.77, |
|
"learning_rate": 5.757587876218039e-05, |
|
"loss": 1.1174, |
|
"step": 4910, |
|
"task_loss": 1.272632360458374 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5401933193206787, |
|
"epoch": 1.78, |
|
"learning_rate": 5.755347377351262e-05, |
|
"loss": 1.1882, |
|
"step": 4920, |
|
"task_loss": 1.1316332817077637 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4937539100646973, |
|
"epoch": 1.78, |
|
"learning_rate": 5.753097012165404e-05, |
|
"loss": 1.1599, |
|
"step": 4930, |
|
"task_loss": 1.5117436647415161 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2138440608978271, |
|
"epoch": 1.79, |
|
"learning_rate": 5.750836788718551e-05, |
|
"loss": 1.0335, |
|
"step": 4940, |
|
"task_loss": 1.3014496564865112 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9365776777267456, |
|
"epoch": 1.79, |
|
"learning_rate": 5.748566715104086e-05, |
|
"loss": 1.1542, |
|
"step": 4950, |
|
"task_loss": 0.8453970551490784 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6637686491012573, |
|
"epoch": 1.79, |
|
"learning_rate": 5.746286799450667e-05, |
|
"loss": 1.14, |
|
"step": 4960, |
|
"task_loss": 1.3181530237197876 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5403568744659424, |
|
"epoch": 1.8, |
|
"learning_rate": 5.743997049922189e-05, |
|
"loss": 1.3815, |
|
"step": 4970, |
|
"task_loss": 1.4503953456878662 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2465837001800537, |
|
"epoch": 1.8, |
|
"learning_rate": 5.741697474717765e-05, |
|
"loss": 1.0925, |
|
"step": 4980, |
|
"task_loss": 0.894507646560669 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8164676427841187, |
|
"epoch": 1.8, |
|
"learning_rate": 5.73938808207169e-05, |
|
"loss": 1.178, |
|
"step": 4990, |
|
"task_loss": 0.6441599130630493 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4209399223327637, |
|
"epoch": 1.81, |
|
"learning_rate": 5.737068880253413e-05, |
|
"loss": 1.1209, |
|
"step": 5000, |
|
"task_loss": 1.1921095848083496 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 79.59318826868495, |
|
"eval_f1": 87.62692030696158, |
|
"step": 5000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5741052627563477, |
|
"epoch": 1.81, |
|
"learning_rate": 5.7347398775675064e-05, |
|
"loss": 1.1256, |
|
"step": 5010, |
|
"task_loss": 0.9303328990936279 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1673991680145264, |
|
"epoch": 1.81, |
|
"learning_rate": 5.7324010823536405e-05, |
|
"loss": 1.2911, |
|
"step": 5020, |
|
"task_loss": 1.5069319009780884 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5451232194900513, |
|
"epoch": 1.82, |
|
"learning_rate": 5.730052502986547e-05, |
|
"loss": 1.2214, |
|
"step": 5030, |
|
"task_loss": 1.338415503501892 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8016589879989624, |
|
"epoch": 1.82, |
|
"learning_rate": 5.727694147875996e-05, |
|
"loss": 1.2434, |
|
"step": 5040, |
|
"task_loss": 0.6144353151321411 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4259705543518066, |
|
"epoch": 1.83, |
|
"learning_rate": 5.725326025466759e-05, |
|
"loss": 1.231, |
|
"step": 5050, |
|
"task_loss": 1.2222867012023926 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8470112085342407, |
|
"epoch": 1.83, |
|
"learning_rate": 5.722948144238586e-05, |
|
"loss": 1.0969, |
|
"step": 5060, |
|
"task_loss": 1.0450043678283691 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1960935592651367, |
|
"epoch": 1.83, |
|
"learning_rate": 5.7205605127061685e-05, |
|
"loss": 1.2254, |
|
"step": 5070, |
|
"task_loss": 0.9947346448898315 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4478334188461304, |
|
"epoch": 1.84, |
|
"learning_rate": 5.718163139419111e-05, |
|
"loss": 1.2485, |
|
"step": 5080, |
|
"task_loss": 1.0019992589950562 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9755243062973022, |
|
"epoch": 1.84, |
|
"learning_rate": 5.7157560329619036e-05, |
|
"loss": 1.0562, |
|
"step": 5090, |
|
"task_loss": 1.1016393899917603 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9732208251953125, |
|
"epoch": 1.84, |
|
"learning_rate": 5.7133392019538904e-05, |
|
"loss": 1.0489, |
|
"step": 5100, |
|
"task_loss": 1.0136487483978271 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.704603910446167, |
|
"epoch": 1.85, |
|
"learning_rate": 5.7109126550492306e-05, |
|
"loss": 1.3115, |
|
"step": 5110, |
|
"task_loss": 1.5213062763214111 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.960157573223114, |
|
"epoch": 1.85, |
|
"learning_rate": 5.70847640093688e-05, |
|
"loss": 0.987, |
|
"step": 5120, |
|
"task_loss": 0.6706898212432861 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2005268335342407, |
|
"epoch": 1.85, |
|
"learning_rate": 5.706030448340552e-05, |
|
"loss": 1.1784, |
|
"step": 5130, |
|
"task_loss": 1.5834624767303467 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.155214548110962, |
|
"epoch": 1.86, |
|
"learning_rate": 5.7035748060186886e-05, |
|
"loss": 1.2087, |
|
"step": 5140, |
|
"task_loss": 1.0313799381256104 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6764129996299744, |
|
"epoch": 1.86, |
|
"learning_rate": 5.701109482764426e-05, |
|
"loss": 1.2403, |
|
"step": 5150, |
|
"task_loss": 1.207790493965149 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2213506698608398, |
|
"epoch": 1.86, |
|
"learning_rate": 5.69863448740557e-05, |
|
"loss": 1.1657, |
|
"step": 5160, |
|
"task_loss": 0.9657225608825684 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.703071117401123, |
|
"epoch": 1.87, |
|
"learning_rate": 5.6961498288045576e-05, |
|
"loss": 1.2662, |
|
"step": 5170, |
|
"task_loss": 1.522236704826355 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8341003656387329, |
|
"epoch": 1.87, |
|
"learning_rate": 5.6936555158584276e-05, |
|
"loss": 1.121, |
|
"step": 5180, |
|
"task_loss": 1.0218309164047241 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0469675064086914, |
|
"epoch": 1.88, |
|
"learning_rate": 5.6911515574987906e-05, |
|
"loss": 1.2055, |
|
"step": 5190, |
|
"task_loss": 0.9404164552688599 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4735569953918457, |
|
"epoch": 1.88, |
|
"learning_rate": 5.688637962691794e-05, |
|
"loss": 1.0771, |
|
"step": 5200, |
|
"task_loss": 1.57379150390625 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.191293478012085, |
|
"epoch": 1.88, |
|
"learning_rate": 5.6861147404380914e-05, |
|
"loss": 1.0159, |
|
"step": 5210, |
|
"task_loss": 0.8541813492774963 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.772650957107544, |
|
"epoch": 1.89, |
|
"learning_rate": 5.6835818997728116e-05, |
|
"loss": 1.1434, |
|
"step": 5220, |
|
"task_loss": 0.8448182344436646 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.254989743232727, |
|
"epoch": 1.89, |
|
"learning_rate": 5.6810394497655246e-05, |
|
"loss": 1.0368, |
|
"step": 5230, |
|
"task_loss": 1.1865497827529907 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.007494568824768, |
|
"epoch": 1.89, |
|
"learning_rate": 5.678487399520206e-05, |
|
"loss": 1.0729, |
|
"step": 5240, |
|
"task_loss": 0.6971535086631775 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3780134916305542, |
|
"epoch": 1.9, |
|
"learning_rate": 5.6759257581752135e-05, |
|
"loss": 1.1506, |
|
"step": 5250, |
|
"task_loss": 0.8896455764770508 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_exact_match": 80.1608325449385, |
|
"eval_f1": 87.85411242609273, |
|
"step": 5250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5140453577041626, |
|
"epoch": 1.9, |
|
"learning_rate": 5.673354534903244e-05, |
|
"loss": 1.2356, |
|
"step": 5260, |
|
"task_loss": 1.2429227828979492 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8260661959648132, |
|
"epoch": 1.9, |
|
"learning_rate": 5.670773738911308e-05, |
|
"loss": 1.1357, |
|
"step": 5270, |
|
"task_loss": 0.6863052845001221 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.963780164718628, |
|
"epoch": 1.91, |
|
"learning_rate": 5.668183379440692e-05, |
|
"loss": 1.334, |
|
"step": 5280, |
|
"task_loss": 1.6351208686828613 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0707981586456299, |
|
"epoch": 1.91, |
|
"learning_rate": 5.665583465766929e-05, |
|
"loss": 1.1792, |
|
"step": 5290, |
|
"task_loss": 1.136970043182373 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.8824594020843506, |
|
"epoch": 1.92, |
|
"learning_rate": 5.662974007199761e-05, |
|
"loss": 1.1516, |
|
"step": 5300, |
|
"task_loss": 1.401637077331543 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5174405574798584, |
|
"epoch": 1.92, |
|
"learning_rate": 5.660355013083112e-05, |
|
"loss": 1.2431, |
|
"step": 5310, |
|
"task_loss": 1.5356242656707764 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1089816093444824, |
|
"epoch": 1.92, |
|
"learning_rate": 5.657726492795047e-05, |
|
"loss": 1.2988, |
|
"step": 5320, |
|
"task_loss": 0.8448818922042847 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6607980728149414, |
|
"epoch": 1.93, |
|
"learning_rate": 5.655088455747745e-05, |
|
"loss": 1.1833, |
|
"step": 5330, |
|
"task_loss": 1.336568832397461 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1899293661117554, |
|
"epoch": 1.93, |
|
"learning_rate": 5.6524409113874617e-05, |
|
"loss": 1.0574, |
|
"step": 5340, |
|
"task_loss": 0.9695686101913452 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5410118103027344, |
|
"epoch": 1.93, |
|
"learning_rate": 5.649783869194495e-05, |
|
"loss": 1.176, |
|
"step": 5350, |
|
"task_loss": 1.28522527217865 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2499977350234985, |
|
"epoch": 1.94, |
|
"learning_rate": 5.647117338683155e-05, |
|
"loss": 1.1411, |
|
"step": 5360, |
|
"task_loss": 1.0919628143310547 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4456626176834106, |
|
"epoch": 1.94, |
|
"learning_rate": 5.6444413294017266e-05, |
|
"loss": 1.204, |
|
"step": 5370, |
|
"task_loss": 0.9402436017990112 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0688556432724, |
|
"epoch": 1.94, |
|
"learning_rate": 5.641755850932434e-05, |
|
"loss": 1.0496, |
|
"step": 5380, |
|
"task_loss": 0.4863385856151581 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1923037767410278, |
|
"epoch": 1.95, |
|
"learning_rate": 5.639060912891412e-05, |
|
"loss": 1.3074, |
|
"step": 5390, |
|
"task_loss": 1.196105718612671 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5121135711669922, |
|
"epoch": 1.95, |
|
"learning_rate": 5.636356524928666e-05, |
|
"loss": 1.2095, |
|
"step": 5400, |
|
"task_loss": 1.1289417743682861 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0193203687667847, |
|
"epoch": 1.96, |
|
"learning_rate": 5.6336426967280403e-05, |
|
"loss": 1.0156, |
|
"step": 5410, |
|
"task_loss": 0.6196513175964355 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1136207580566406, |
|
"epoch": 1.96, |
|
"learning_rate": 5.6309194380071825e-05, |
|
"loss": 1.0743, |
|
"step": 5420, |
|
"task_loss": 0.920132577419281 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0087127685546875, |
|
"epoch": 1.96, |
|
"learning_rate": 5.6281867585175094e-05, |
|
"loss": 1.1059, |
|
"step": 5430, |
|
"task_loss": 0.8849406242370605 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9823505282402039, |
|
"epoch": 1.97, |
|
"learning_rate": 5.62544466804417e-05, |
|
"loss": 1.1327, |
|
"step": 5440, |
|
"task_loss": 1.1544190645217896 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.929497480392456, |
|
"epoch": 1.97, |
|
"learning_rate": 5.622693176406015e-05, |
|
"loss": 1.2371, |
|
"step": 5450, |
|
"task_loss": 0.6615207195281982 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8319283723831177, |
|
"epoch": 1.97, |
|
"learning_rate": 5.619932293455555e-05, |
|
"loss": 1.1588, |
|
"step": 5460, |
|
"task_loss": 0.6978183388710022 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0234538316726685, |
|
"epoch": 1.98, |
|
"learning_rate": 5.617162029078931e-05, |
|
"loss": 1.2865, |
|
"step": 5470, |
|
"task_loss": 1.0632944107055664 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2524006366729736, |
|
"epoch": 1.98, |
|
"learning_rate": 5.614382393195878e-05, |
|
"loss": 1.1138, |
|
"step": 5480, |
|
"task_loss": 0.9473165273666382 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9574975967407227, |
|
"epoch": 1.98, |
|
"learning_rate": 5.611593395759687e-05, |
|
"loss": 1.1326, |
|
"step": 5490, |
|
"task_loss": 0.6084072589874268 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1083191633224487, |
|
"epoch": 1.99, |
|
"learning_rate": 5.6087950467571686e-05, |
|
"loss": 1.0398, |
|
"step": 5500, |
|
"task_loss": 1.5072331428527832 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 79.93377483443709, |
|
"eval_f1": 87.66760484249721, |
|
"step": 5500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1833401918411255, |
|
"epoch": 1.99, |
|
"learning_rate": 5.605987356208624e-05, |
|
"loss": 1.1425, |
|
"step": 5510, |
|
"task_loss": 0.9314687252044678 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9431557059288025, |
|
"epoch": 1.99, |
|
"learning_rate": 5.603170334167802e-05, |
|
"loss": 1.1334, |
|
"step": 5520, |
|
"task_loss": 0.8197354078292847 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1592495441436768, |
|
"epoch": 2.0, |
|
"learning_rate": 5.6003439907218656e-05, |
|
"loss": 1.2719, |
|
"step": 5530, |
|
"task_loss": 1.1633968353271484 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9738726615905762, |
|
"epoch": 2.0, |
|
"learning_rate": 5.597508335991354e-05, |
|
"loss": 1.0092, |
|
"step": 5540, |
|
"task_loss": 0.6517045497894287 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9099946022033691, |
|
"epoch": 2.01, |
|
"learning_rate": 5.594663380130153e-05, |
|
"loss": 0.9207, |
|
"step": 5550, |
|
"task_loss": 0.8611887693405151 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8894250392913818, |
|
"epoch": 2.01, |
|
"learning_rate": 5.591809133325448e-05, |
|
"loss": 0.8773, |
|
"step": 5560, |
|
"task_loss": 0.7208254933357239 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1827757358551025, |
|
"epoch": 2.01, |
|
"learning_rate": 5.588945605797698e-05, |
|
"loss": 0.8743, |
|
"step": 5570, |
|
"task_loss": 1.253017783164978 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9436140060424805, |
|
"epoch": 2.02, |
|
"learning_rate": 5.5860728078005916e-05, |
|
"loss": 0.983, |
|
"step": 5580, |
|
"task_loss": 0.6787883639335632 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3159337043762207, |
|
"epoch": 2.02, |
|
"learning_rate": 5.583190749621014e-05, |
|
"loss": 0.9276, |
|
"step": 5590, |
|
"task_loss": 1.879063367843628 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7113268375396729, |
|
"epoch": 2.02, |
|
"learning_rate": 5.580299441579008e-05, |
|
"loss": 0.8873, |
|
"step": 5600, |
|
"task_loss": 1.2208278179168701 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8449651002883911, |
|
"epoch": 2.03, |
|
"learning_rate": 5.5773988940277416e-05, |
|
"loss": 0.8183, |
|
"step": 5610, |
|
"task_loss": 0.7712351083755493 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.184571385383606, |
|
"epoch": 2.03, |
|
"learning_rate": 5.574489117353463e-05, |
|
"loss": 0.8997, |
|
"step": 5620, |
|
"task_loss": 1.2251086235046387 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.027880311012268, |
|
"epoch": 2.03, |
|
"learning_rate": 5.571570121975472e-05, |
|
"loss": 0.8811, |
|
"step": 5630, |
|
"task_loss": 1.2804369926452637 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5541472434997559, |
|
"epoch": 2.04, |
|
"learning_rate": 5.568641918346074e-05, |
|
"loss": 1.0087, |
|
"step": 5640, |
|
"task_loss": 1.7716798782348633 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9887833595275879, |
|
"epoch": 2.04, |
|
"learning_rate": 5.565704516950552e-05, |
|
"loss": 0.9056, |
|
"step": 5650, |
|
"task_loss": 1.108952283859253 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8718394041061401, |
|
"epoch": 2.05, |
|
"learning_rate": 5.562757928307121e-05, |
|
"loss": 0.8578, |
|
"step": 5660, |
|
"task_loss": 0.7642139196395874 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.6347434520721436, |
|
"epoch": 2.05, |
|
"learning_rate": 5.559802162966897e-05, |
|
"loss": 0.9785, |
|
"step": 5670, |
|
"task_loss": 1.5460071563720703 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.013885736465454, |
|
"epoch": 2.05, |
|
"learning_rate": 5.556837231513852e-05, |
|
"loss": 0.9215, |
|
"step": 5680, |
|
"task_loss": 1.1903676986694336 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8532087802886963, |
|
"epoch": 2.06, |
|
"learning_rate": 5.553863144564781e-05, |
|
"loss": 0.9352, |
|
"step": 5690, |
|
"task_loss": 1.0140926837921143 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6742889881134033, |
|
"epoch": 2.06, |
|
"learning_rate": 5.550879912769264e-05, |
|
"loss": 0.9399, |
|
"step": 5700, |
|
"task_loss": 0.7566931843757629 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.184865117073059, |
|
"epoch": 2.06, |
|
"learning_rate": 5.5478875468096265e-05, |
|
"loss": 0.9145, |
|
"step": 5710, |
|
"task_loss": 1.0496134757995605 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8306396007537842, |
|
"epoch": 2.07, |
|
"learning_rate": 5.5448860574009015e-05, |
|
"loss": 0.8817, |
|
"step": 5720, |
|
"task_loss": 0.7623034715652466 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2012348175048828, |
|
"epoch": 2.07, |
|
"learning_rate": 5.5418754552907905e-05, |
|
"loss": 0.9289, |
|
"step": 5730, |
|
"task_loss": 1.1866190433502197 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.015893578529358, |
|
"epoch": 2.07, |
|
"learning_rate": 5.5388557512596255e-05, |
|
"loss": 0.8678, |
|
"step": 5740, |
|
"task_loss": 0.9989817142486572 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8217979669570923, |
|
"epoch": 2.08, |
|
"learning_rate": 5.535826956120332e-05, |
|
"loss": 0.9298, |
|
"step": 5750, |
|
"task_loss": 1.2064638137817383 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 79.97161778618732, |
|
"eval_f1": 88.04204326637895, |
|
"step": 5750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9215903878211975, |
|
"epoch": 2.08, |
|
"learning_rate": 5.532789080718388e-05, |
|
"loss": 0.9191, |
|
"step": 5760, |
|
"task_loss": 1.0276141166687012 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.038353681564331, |
|
"epoch": 2.09, |
|
"learning_rate": 5.5297421359317855e-05, |
|
"loss": 0.8936, |
|
"step": 5770, |
|
"task_loss": 1.220682144165039 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6983292102813721, |
|
"epoch": 2.09, |
|
"learning_rate": 5.5266861326709916e-05, |
|
"loss": 0.93, |
|
"step": 5780, |
|
"task_loss": 1.0011334419250488 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.005040168762207, |
|
"epoch": 2.09, |
|
"learning_rate": 5.5236210818789134e-05, |
|
"loss": 0.9048, |
|
"step": 5790, |
|
"task_loss": 0.8836946487426758 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.034933090209961, |
|
"epoch": 2.1, |
|
"learning_rate": 5.52054699453085e-05, |
|
"loss": 0.8101, |
|
"step": 5800, |
|
"task_loss": 0.8946719765663147 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0742028951644897, |
|
"epoch": 2.1, |
|
"learning_rate": 5.5174638816344634e-05, |
|
"loss": 1.0352, |
|
"step": 5810, |
|
"task_loss": 0.7151690721511841 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9790550470352173, |
|
"epoch": 2.1, |
|
"learning_rate": 5.514371754229731e-05, |
|
"loss": 0.8449, |
|
"step": 5820, |
|
"task_loss": 0.990552544593811 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0028564929962158, |
|
"epoch": 2.11, |
|
"learning_rate": 5.5112706233889095e-05, |
|
"loss": 0.8105, |
|
"step": 5830, |
|
"task_loss": 0.9065827131271362 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9125075340270996, |
|
"epoch": 2.11, |
|
"learning_rate": 5.508160500216497e-05, |
|
"loss": 0.7907, |
|
"step": 5840, |
|
"task_loss": 0.7284402251243591 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.029099464416504, |
|
"epoch": 2.11, |
|
"learning_rate": 5.50504139584919e-05, |
|
"loss": 1.1059, |
|
"step": 5850, |
|
"task_loss": 0.6841808557510376 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.064493179321289, |
|
"epoch": 2.12, |
|
"learning_rate": 5.5019133214558446e-05, |
|
"loss": 1.0304, |
|
"step": 5860, |
|
"task_loss": 1.0819746255874634 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7983106374740601, |
|
"epoch": 2.12, |
|
"learning_rate": 5.4987762882374365e-05, |
|
"loss": 0.8406, |
|
"step": 5870, |
|
"task_loss": 0.940750002861023 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7291630506515503, |
|
"epoch": 2.13, |
|
"learning_rate": 5.495630307427021e-05, |
|
"loss": 0.8193, |
|
"step": 5880, |
|
"task_loss": 0.7505497932434082 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1587085723876953, |
|
"epoch": 2.13, |
|
"learning_rate": 5.492475390289695e-05, |
|
"loss": 0.9177, |
|
"step": 5890, |
|
"task_loss": 1.006648302078247 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9014360904693604, |
|
"epoch": 2.13, |
|
"learning_rate": 5.489311548122551e-05, |
|
"loss": 0.9625, |
|
"step": 5900, |
|
"task_loss": 0.8569836616516113 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8141177892684937, |
|
"epoch": 2.14, |
|
"learning_rate": 5.486138792254644e-05, |
|
"loss": 0.9307, |
|
"step": 5910, |
|
"task_loss": 0.8875386714935303 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6669865250587463, |
|
"epoch": 2.14, |
|
"learning_rate": 5.482957134046943e-05, |
|
"loss": 0.9236, |
|
"step": 5920, |
|
"task_loss": 1.001587152481079 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8241477012634277, |
|
"epoch": 2.14, |
|
"learning_rate": 5.479766584892297e-05, |
|
"loss": 0.8674, |
|
"step": 5930, |
|
"task_loss": 1.012243628501892 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.938452959060669, |
|
"epoch": 2.15, |
|
"learning_rate": 5.476567156215392e-05, |
|
"loss": 0.9174, |
|
"step": 5940, |
|
"task_loss": 0.5267655253410339 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8367009162902832, |
|
"epoch": 2.15, |
|
"learning_rate": 5.4733588594727075e-05, |
|
"loss": 0.8657, |
|
"step": 5950, |
|
"task_loss": 0.5014982223510742 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9218536615371704, |
|
"epoch": 2.15, |
|
"learning_rate": 5.470141706152479e-05, |
|
"loss": 0.9756, |
|
"step": 5960, |
|
"task_loss": 1.0196517705917358 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5319288372993469, |
|
"epoch": 2.16, |
|
"learning_rate": 5.466915707774656e-05, |
|
"loss": 0.8654, |
|
"step": 5970, |
|
"task_loss": 0.6182997226715088 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7957490682601929, |
|
"epoch": 2.16, |
|
"learning_rate": 5.463680875890861e-05, |
|
"loss": 0.8933, |
|
"step": 5980, |
|
"task_loss": 0.7909960746765137 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5595934391021729, |
|
"epoch": 2.16, |
|
"learning_rate": 5.460437222084344e-05, |
|
"loss": 0.9095, |
|
"step": 5990, |
|
"task_loss": 0.6065921783447266 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3127511739730835, |
|
"epoch": 2.17, |
|
"learning_rate": 5.4571847579699476e-05, |
|
"loss": 0.9389, |
|
"step": 6000, |
|
"task_loss": 1.0244468450546265 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 80.4162724692526, |
|
"eval_f1": 88.26245305245004, |
|
"step": 6000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9304770231246948, |
|
"epoch": 2.17, |
|
"learning_rate": 5.453923495194062e-05, |
|
"loss": 0.8205, |
|
"step": 6010, |
|
"task_loss": 0.7821296453475952 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4573397636413574, |
|
"epoch": 2.18, |
|
"learning_rate": 5.4506534454345814e-05, |
|
"loss": 0.9258, |
|
"step": 6020, |
|
"task_loss": 1.1528065204620361 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7485623359680176, |
|
"epoch": 2.18, |
|
"learning_rate": 5.4473746204008664e-05, |
|
"loss": 1.008, |
|
"step": 6030, |
|
"task_loss": 0.8168891668319702 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9035183191299438, |
|
"epoch": 2.18, |
|
"learning_rate": 5.444087031833699e-05, |
|
"loss": 0.8543, |
|
"step": 6040, |
|
"task_loss": 1.0606868267059326 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1698881387710571, |
|
"epoch": 2.19, |
|
"learning_rate": 5.4407906915052425e-05, |
|
"loss": 0.8646, |
|
"step": 6050, |
|
"task_loss": 1.4439537525177002 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8415951132774353, |
|
"epoch": 2.19, |
|
"learning_rate": 5.437485611218999e-05, |
|
"loss": 0.8619, |
|
"step": 6060, |
|
"task_loss": 0.7370737791061401 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1627850532531738, |
|
"epoch": 2.19, |
|
"learning_rate": 5.434171802809764e-05, |
|
"loss": 0.9036, |
|
"step": 6070, |
|
"task_loss": 0.8056766390800476 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7362239956855774, |
|
"epoch": 2.2, |
|
"learning_rate": 5.430849278143587e-05, |
|
"loss": 0.8756, |
|
"step": 6080, |
|
"task_loss": 1.0063245296478271 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8030550479888916, |
|
"epoch": 2.2, |
|
"learning_rate": 5.427518049117732e-05, |
|
"loss": 0.9322, |
|
"step": 6090, |
|
"task_loss": 0.8871850967407227 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0586618185043335, |
|
"epoch": 2.2, |
|
"learning_rate": 5.4241781276606274e-05, |
|
"loss": 0.8834, |
|
"step": 6100, |
|
"task_loss": 1.0324198007583618 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6400578022003174, |
|
"epoch": 2.21, |
|
"learning_rate": 5.4208295257318296e-05, |
|
"loss": 0.8739, |
|
"step": 6110, |
|
"task_loss": 0.48888489603996277 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6222153306007385, |
|
"epoch": 2.21, |
|
"learning_rate": 5.417472255321977e-05, |
|
"loss": 0.8171, |
|
"step": 6120, |
|
"task_loss": 0.6630902290344238 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6763343811035156, |
|
"epoch": 2.22, |
|
"learning_rate": 5.414106328452749e-05, |
|
"loss": 0.9668, |
|
"step": 6130, |
|
"task_loss": 0.6360142827033997 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9001832008361816, |
|
"epoch": 2.22, |
|
"learning_rate": 5.4107317571768215e-05, |
|
"loss": 0.9618, |
|
"step": 6140, |
|
"task_loss": 0.7632064819335938 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.07004714012146, |
|
"epoch": 2.22, |
|
"learning_rate": 5.407348553577823e-05, |
|
"loss": 0.8878, |
|
"step": 6150, |
|
"task_loss": 1.4211241006851196 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8392785787582397, |
|
"epoch": 2.23, |
|
"learning_rate": 5.403956729770294e-05, |
|
"loss": 0.9421, |
|
"step": 6160, |
|
"task_loss": 0.6807509660720825 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8789479732513428, |
|
"epoch": 2.23, |
|
"learning_rate": 5.4005562978996424e-05, |
|
"loss": 0.9041, |
|
"step": 6170, |
|
"task_loss": 0.6013485193252563 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9375628232955933, |
|
"epoch": 2.23, |
|
"learning_rate": 5.3971472701420984e-05, |
|
"loss": 1.005, |
|
"step": 6180, |
|
"task_loss": 0.9426907300949097 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8983448147773743, |
|
"epoch": 2.24, |
|
"learning_rate": 5.393729658704673e-05, |
|
"loss": 0.8028, |
|
"step": 6190, |
|
"task_loss": 0.7798272371292114 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8339853882789612, |
|
"epoch": 2.24, |
|
"learning_rate": 5.3903034758251136e-05, |
|
"loss": 0.8309, |
|
"step": 6200, |
|
"task_loss": 0.4220605194568634 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8184782266616821, |
|
"epoch": 2.24, |
|
"learning_rate": 5.386868733771861e-05, |
|
"loss": 0.8682, |
|
"step": 6210, |
|
"task_loss": 0.509628415107727 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.024518609046936, |
|
"epoch": 2.25, |
|
"learning_rate": 5.383425444844001e-05, |
|
"loss": 0.8848, |
|
"step": 6220, |
|
"task_loss": 1.1755342483520508 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6047682166099548, |
|
"epoch": 2.25, |
|
"learning_rate": 5.379973621371229e-05, |
|
"loss": 0.8786, |
|
"step": 6230, |
|
"task_loss": 0.5626362562179565 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2282063961029053, |
|
"epoch": 2.26, |
|
"learning_rate": 5.3765132757137976e-05, |
|
"loss": 0.9194, |
|
"step": 6240, |
|
"task_loss": 0.9038418531417847 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0795953273773193, |
|
"epoch": 2.26, |
|
"learning_rate": 5.373044420262477e-05, |
|
"loss": 0.8973, |
|
"step": 6250, |
|
"task_loss": 1.1270602941513062 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_exact_match": 80.32166508987702, |
|
"eval_f1": 88.11121062075824, |
|
"step": 6250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9112297296524048, |
|
"epoch": 2.26, |
|
"learning_rate": 5.369567067438508e-05, |
|
"loss": 0.8557, |
|
"step": 6260, |
|
"task_loss": 1.6011927127838135 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8365263938903809, |
|
"epoch": 2.27, |
|
"learning_rate": 5.3660812296935584e-05, |
|
"loss": 0.8108, |
|
"step": 6270, |
|
"task_loss": 0.6560771465301514 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1342790126800537, |
|
"epoch": 2.27, |
|
"learning_rate": 5.362586919509681e-05, |
|
"loss": 0.9093, |
|
"step": 6280, |
|
"task_loss": 1.2042913436889648 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9722185134887695, |
|
"epoch": 2.27, |
|
"learning_rate": 5.359084149399264e-05, |
|
"loss": 0.8678, |
|
"step": 6290, |
|
"task_loss": 0.9090611934661865 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0596710443496704, |
|
"epoch": 2.28, |
|
"learning_rate": 5.355572931904991e-05, |
|
"loss": 0.9807, |
|
"step": 6300, |
|
"task_loss": 1.3353559970855713 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.045316457748413, |
|
"epoch": 2.28, |
|
"learning_rate": 5.3520532795997916e-05, |
|
"loss": 0.8632, |
|
"step": 6310, |
|
"task_loss": 0.6300460696220398 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8498081564903259, |
|
"epoch": 2.28, |
|
"learning_rate": 5.348525205086801e-05, |
|
"loss": 0.9675, |
|
"step": 6320, |
|
"task_loss": 0.6726857423782349 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9325256943702698, |
|
"epoch": 2.29, |
|
"learning_rate": 5.344988720999312e-05, |
|
"loss": 0.9498, |
|
"step": 6330, |
|
"task_loss": 0.6439615488052368 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8513062000274658, |
|
"epoch": 2.29, |
|
"learning_rate": 5.3414438400007276e-05, |
|
"loss": 0.8753, |
|
"step": 6340, |
|
"task_loss": 0.7384096384048462 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8684785962104797, |
|
"epoch": 2.29, |
|
"learning_rate": 5.337890574784523e-05, |
|
"loss": 0.8852, |
|
"step": 6350, |
|
"task_loss": 0.7421172261238098 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8197358846664429, |
|
"epoch": 2.3, |
|
"learning_rate": 5.334328938074193e-05, |
|
"loss": 0.8828, |
|
"step": 6360, |
|
"task_loss": 0.8697389364242554 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9179954528808594, |
|
"epoch": 2.3, |
|
"learning_rate": 5.3307589426232076e-05, |
|
"loss": 0.8377, |
|
"step": 6370, |
|
"task_loss": 1.053146481513977 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9556145071983337, |
|
"epoch": 2.31, |
|
"learning_rate": 5.327180601214971e-05, |
|
"loss": 0.8407, |
|
"step": 6380, |
|
"task_loss": 0.9011473655700684 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7499949932098389, |
|
"epoch": 2.31, |
|
"learning_rate": 5.323593926662768e-05, |
|
"loss": 0.9696, |
|
"step": 6390, |
|
"task_loss": 0.6749441623687744 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.013110876083374, |
|
"epoch": 2.31, |
|
"learning_rate": 5.319998931809728e-05, |
|
"loss": 0.9384, |
|
"step": 6400, |
|
"task_loss": 0.854491651058197 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0834851264953613, |
|
"epoch": 2.32, |
|
"learning_rate": 5.316395629528771e-05, |
|
"loss": 0.7752, |
|
"step": 6410, |
|
"task_loss": 1.330308198928833 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.5485963821411133, |
|
"epoch": 2.32, |
|
"learning_rate": 5.312784032722561e-05, |
|
"loss": 0.9658, |
|
"step": 6420, |
|
"task_loss": 1.17985200881958 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8666309118270874, |
|
"epoch": 2.32, |
|
"learning_rate": 5.309164154323469e-05, |
|
"loss": 0.9012, |
|
"step": 6430, |
|
"task_loss": 0.88444983959198 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6017141938209534, |
|
"epoch": 2.33, |
|
"learning_rate": 5.305536007293517e-05, |
|
"loss": 0.9155, |
|
"step": 6440, |
|
"task_loss": 0.6628368496894836 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7634067535400391, |
|
"epoch": 2.33, |
|
"learning_rate": 5.301899604624336e-05, |
|
"loss": 0.8433, |
|
"step": 6450, |
|
"task_loss": 0.7978712320327759 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9220677614212036, |
|
"epoch": 2.33, |
|
"learning_rate": 5.298254959337119e-05, |
|
"loss": 0.8675, |
|
"step": 6460, |
|
"task_loss": 1.0927445888519287 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6314671039581299, |
|
"epoch": 2.34, |
|
"learning_rate": 5.294602084482573e-05, |
|
"loss": 0.8681, |
|
"step": 6470, |
|
"task_loss": 0.41532278060913086 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7226696014404297, |
|
"epoch": 2.34, |
|
"learning_rate": 5.2909409931408735e-05, |
|
"loss": 0.9045, |
|
"step": 6480, |
|
"task_loss": 0.8908495306968689 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8101774454116821, |
|
"epoch": 2.35, |
|
"learning_rate": 5.28727169842162e-05, |
|
"loss": 0.9288, |
|
"step": 6490, |
|
"task_loss": 0.5963428020477295 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7302408814430237, |
|
"epoch": 2.35, |
|
"learning_rate": 5.283594213463783e-05, |
|
"loss": 0.7634, |
|
"step": 6500, |
|
"task_loss": 0.6015718579292297 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 80.53926206244087, |
|
"eval_f1": 88.24102075665736, |
|
"step": 6500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8358856439590454, |
|
"epoch": 2.35, |
|
"learning_rate": 5.279908551435662e-05, |
|
"loss": 0.8888, |
|
"step": 6510, |
|
"task_loss": 0.5410736799240112 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7577072381973267, |
|
"epoch": 2.36, |
|
"learning_rate": 5.276214725534839e-05, |
|
"loss": 0.9435, |
|
"step": 6520, |
|
"task_loss": 0.9352724552154541 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6489107012748718, |
|
"epoch": 2.36, |
|
"learning_rate": 5.272512748988126e-05, |
|
"loss": 0.8791, |
|
"step": 6530, |
|
"task_loss": 0.7899311780929565 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6359513998031616, |
|
"epoch": 2.36, |
|
"learning_rate": 5.268802635051522e-05, |
|
"loss": 0.7681, |
|
"step": 6540, |
|
"task_loss": 0.3422088027000427 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0363128185272217, |
|
"epoch": 2.37, |
|
"learning_rate": 5.265084397010164e-05, |
|
"loss": 0.9111, |
|
"step": 6550, |
|
"task_loss": 1.0027375221252441 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0372557640075684, |
|
"epoch": 2.37, |
|
"learning_rate": 5.26135804817828e-05, |
|
"loss": 0.8456, |
|
"step": 6560, |
|
"task_loss": 1.0056697130203247 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9141188263893127, |
|
"epoch": 2.37, |
|
"learning_rate": 5.257623601899141e-05, |
|
"loss": 0.8281, |
|
"step": 6570, |
|
"task_loss": 1.3693073987960815 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7791051864624023, |
|
"epoch": 2.38, |
|
"learning_rate": 5.253881071545013e-05, |
|
"loss": 0.8698, |
|
"step": 6580, |
|
"task_loss": 0.7371783256530762 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7637522220611572, |
|
"epoch": 2.38, |
|
"learning_rate": 5.25013047051711e-05, |
|
"loss": 0.8717, |
|
"step": 6590, |
|
"task_loss": 0.41452422738075256 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6556802988052368, |
|
"epoch": 2.39, |
|
"learning_rate": 5.2463718122455444e-05, |
|
"loss": 0.8335, |
|
"step": 6600, |
|
"task_loss": 0.6950621604919434 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9156330823898315, |
|
"epoch": 2.39, |
|
"learning_rate": 5.242605110189281e-05, |
|
"loss": 0.9741, |
|
"step": 6610, |
|
"task_loss": 0.9320003986358643 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6790286302566528, |
|
"epoch": 2.39, |
|
"learning_rate": 5.2388303778360865e-05, |
|
"loss": 0.8083, |
|
"step": 6620, |
|
"task_loss": 0.5760271549224854 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9703154563903809, |
|
"epoch": 2.4, |
|
"learning_rate": 5.235047628702483e-05, |
|
"loss": 0.9415, |
|
"step": 6630, |
|
"task_loss": 0.8003017902374268 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7981125712394714, |
|
"epoch": 2.4, |
|
"learning_rate": 5.2312568763336995e-05, |
|
"loss": 1.0161, |
|
"step": 6640, |
|
"task_loss": 0.6487212181091309 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8091559410095215, |
|
"epoch": 2.4, |
|
"learning_rate": 5.227458134303623e-05, |
|
"loss": 0.8289, |
|
"step": 6650, |
|
"task_loss": 1.0017086267471313 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7591532468795776, |
|
"epoch": 2.41, |
|
"learning_rate": 5.223651416214749e-05, |
|
"loss": 0.8141, |
|
"step": 6660, |
|
"task_loss": 0.4736844599246979 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.658312201499939, |
|
"epoch": 2.41, |
|
"learning_rate": 5.2198367356981325e-05, |
|
"loss": 0.8889, |
|
"step": 6670, |
|
"task_loss": 0.6780068278312683 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9467959403991699, |
|
"epoch": 2.41, |
|
"learning_rate": 5.2160141064133426e-05, |
|
"loss": 0.8872, |
|
"step": 6680, |
|
"task_loss": 1.043292760848999 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7465643882751465, |
|
"epoch": 2.42, |
|
"learning_rate": 5.2125669551728134e-05, |
|
"loss": 0.7908, |
|
"step": 6690, |
|
"task_loss": 0.5276561975479126 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8351773023605347, |
|
"epoch": 2.42, |
|
"learning_rate": 5.208729260962506e-05, |
|
"loss": 0.9061, |
|
"step": 6700, |
|
"task_loss": 0.9505938291549683 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.628624439239502, |
|
"epoch": 2.43, |
|
"learning_rate": 5.204883657757553e-05, |
|
"loss": 0.879, |
|
"step": 6710, |
|
"task_loss": 0.8255483508110046 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6216802000999451, |
|
"epoch": 2.43, |
|
"learning_rate": 5.201030159328252e-05, |
|
"loss": 0.8161, |
|
"step": 6720, |
|
"task_loss": 0.7303711175918579 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.187577247619629, |
|
"epoch": 2.43, |
|
"learning_rate": 5.1971687794731685e-05, |
|
"loss": 0.8769, |
|
"step": 6730, |
|
"task_loss": 0.9617449045181274 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.022446870803833, |
|
"epoch": 2.44, |
|
"learning_rate": 5.193299532019094e-05, |
|
"loss": 0.9059, |
|
"step": 6740, |
|
"task_loss": 1.0978424549102783 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7576553821563721, |
|
"epoch": 2.44, |
|
"learning_rate": 5.189422430820986e-05, |
|
"loss": 0.9146, |
|
"step": 6750, |
|
"task_loss": 0.9070881605148315 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_exact_match": 80.6244087038789, |
|
"eval_f1": 88.24355008399391, |
|
"step": 6750 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3398449420928955, |
|
"epoch": 2.44, |
|
"learning_rate": 5.185537489761931e-05, |
|
"loss": 1.0719, |
|
"step": 6760, |
|
"task_loss": 1.251268744468689 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8856955766677856, |
|
"epoch": 2.45, |
|
"learning_rate": 5.181644722753083e-05, |
|
"loss": 0.8418, |
|
"step": 6770, |
|
"task_loss": 0.7368022799491882 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0002448558807373, |
|
"epoch": 2.45, |
|
"learning_rate": 5.177744143733622e-05, |
|
"loss": 0.8679, |
|
"step": 6780, |
|
"task_loss": 1.0230883359909058 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0123604536056519, |
|
"epoch": 2.45, |
|
"learning_rate": 5.173835766670701e-05, |
|
"loss": 0.9899, |
|
"step": 6790, |
|
"task_loss": 1.1285487413406372 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8969544172286987, |
|
"epoch": 2.46, |
|
"learning_rate": 5.1699196055593954e-05, |
|
"loss": 0.9321, |
|
"step": 6800, |
|
"task_loss": 0.9808671474456787 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.399101734161377, |
|
"epoch": 2.46, |
|
"learning_rate": 5.165995674422654e-05, |
|
"loss": 1.0175, |
|
"step": 6810, |
|
"task_loss": 0.9696444272994995 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7293881177902222, |
|
"epoch": 2.46, |
|
"learning_rate": 5.162063987311249e-05, |
|
"loss": 0.9052, |
|
"step": 6820, |
|
"task_loss": 0.8145143389701843 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7391337156295776, |
|
"epoch": 2.47, |
|
"learning_rate": 5.158124558303723e-05, |
|
"loss": 0.7945, |
|
"step": 6830, |
|
"task_loss": 0.7841509580612183 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6791837215423584, |
|
"epoch": 2.47, |
|
"learning_rate": 5.1541774015063435e-05, |
|
"loss": 0.8292, |
|
"step": 6840, |
|
"task_loss": 0.6428923606872559 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.803602397441864, |
|
"epoch": 2.48, |
|
"learning_rate": 5.150222531053048e-05, |
|
"loss": 0.8626, |
|
"step": 6850, |
|
"task_loss": 0.8221435546875 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7116631269454956, |
|
"epoch": 2.48, |
|
"learning_rate": 5.146259961105396e-05, |
|
"loss": 0.9257, |
|
"step": 6860, |
|
"task_loss": 0.5078368186950684 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9278137683868408, |
|
"epoch": 2.48, |
|
"learning_rate": 5.142289705852514e-05, |
|
"loss": 0.9753, |
|
"step": 6870, |
|
"task_loss": 1.4697847366333008 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.93769770860672, |
|
"epoch": 2.49, |
|
"learning_rate": 5.138311779511054e-05, |
|
"loss": 1.012, |
|
"step": 6880, |
|
"task_loss": 1.0208895206451416 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5001537799835205, |
|
"epoch": 2.49, |
|
"learning_rate": 5.134326196325131e-05, |
|
"loss": 0.8302, |
|
"step": 6890, |
|
"task_loss": 0.9387691020965576 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.733636736869812, |
|
"epoch": 2.49, |
|
"learning_rate": 5.130332970566278e-05, |
|
"loss": 0.822, |
|
"step": 6900, |
|
"task_loss": 0.7175207734107971 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.924873948097229, |
|
"epoch": 2.5, |
|
"learning_rate": 5.1263321165334e-05, |
|
"loss": 1.0059, |
|
"step": 6910, |
|
"task_loss": 0.8183651566505432 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9852616190910339, |
|
"epoch": 2.5, |
|
"learning_rate": 5.122323648552711e-05, |
|
"loss": 0.9519, |
|
"step": 6920, |
|
"task_loss": 1.253139853477478 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8422321081161499, |
|
"epoch": 2.5, |
|
"learning_rate": 5.11830758097769e-05, |
|
"loss": 0.771, |
|
"step": 6930, |
|
"task_loss": 0.6653087139129639 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.179046392440796, |
|
"epoch": 2.51, |
|
"learning_rate": 5.114283928189032e-05, |
|
"loss": 1.021, |
|
"step": 6940, |
|
"task_loss": 0.617653489112854 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5847468376159668, |
|
"epoch": 2.51, |
|
"learning_rate": 5.110252704594591e-05, |
|
"loss": 0.8193, |
|
"step": 6950, |
|
"task_loss": 0.5160527229309082 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0966300964355469, |
|
"epoch": 2.52, |
|
"learning_rate": 5.106213924629328e-05, |
|
"loss": 1.0079, |
|
"step": 6960, |
|
"task_loss": 0.8059948682785034 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7070465087890625, |
|
"epoch": 2.52, |
|
"learning_rate": 5.102167602755267e-05, |
|
"loss": 0.9058, |
|
"step": 6970, |
|
"task_loss": 0.5223201513290405 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9722675681114197, |
|
"epoch": 2.52, |
|
"learning_rate": 5.0981137534614325e-05, |
|
"loss": 0.8457, |
|
"step": 6980, |
|
"task_loss": 0.7618287801742554 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8757837414741516, |
|
"epoch": 2.53, |
|
"learning_rate": 5.094052391263807e-05, |
|
"loss": 0.8939, |
|
"step": 6990, |
|
"task_loss": 0.8423348665237427 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3165154457092285, |
|
"epoch": 2.53, |
|
"learning_rate": 5.089983530705272e-05, |
|
"loss": 0.8859, |
|
"step": 7000, |
|
"task_loss": 0.809664785861969 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_exact_match": 80.70955534531693, |
|
"eval_f1": 88.51734461953797, |
|
"step": 7000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7622497081756592, |
|
"epoch": 2.53, |
|
"learning_rate": 5.085907186355564e-05, |
|
"loss": 0.8179, |
|
"step": 7010, |
|
"task_loss": 0.8336362242698669 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6575897932052612, |
|
"epoch": 2.54, |
|
"learning_rate": 5.081823372811212e-05, |
|
"loss": 0.894, |
|
"step": 7020, |
|
"task_loss": 0.7306101322174072 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7808577418327332, |
|
"epoch": 2.54, |
|
"learning_rate": 5.0777321046954936e-05, |
|
"loss": 0.8247, |
|
"step": 7030, |
|
"task_loss": 0.9445192217826843 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7908771634101868, |
|
"epoch": 2.54, |
|
"learning_rate": 5.073633396658378e-05, |
|
"loss": 0.8793, |
|
"step": 7040, |
|
"task_loss": 1.0309138298034668 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.759590208530426, |
|
"epoch": 2.55, |
|
"learning_rate": 5.069527263376478e-05, |
|
"loss": 0.8812, |
|
"step": 7050, |
|
"task_loss": 0.7510941028594971 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9497214555740356, |
|
"epoch": 2.55, |
|
"learning_rate": 5.06541371955299e-05, |
|
"loss": 0.9211, |
|
"step": 7060, |
|
"task_loss": 1.1385433673858643 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8788970708847046, |
|
"epoch": 2.56, |
|
"learning_rate": 5.061292779917651e-05, |
|
"loss": 0.9177, |
|
"step": 7070, |
|
"task_loss": 1.2690625190734863 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5018882751464844, |
|
"epoch": 2.56, |
|
"learning_rate": 5.0571644592266784e-05, |
|
"loss": 0.898, |
|
"step": 7080, |
|
"task_loss": 0.3722462058067322 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0037760734558105, |
|
"epoch": 2.56, |
|
"learning_rate": 5.053028772262718e-05, |
|
"loss": 0.9261, |
|
"step": 7090, |
|
"task_loss": 0.9891495704650879 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2450032234191895, |
|
"epoch": 2.57, |
|
"learning_rate": 5.048885733834797e-05, |
|
"loss": 0.8834, |
|
"step": 7100, |
|
"task_loss": 1.3762762546539307 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.4217417240142822, |
|
"epoch": 2.57, |
|
"learning_rate": 5.044735358778261e-05, |
|
"loss": 0.8716, |
|
"step": 7110, |
|
"task_loss": 1.2648154497146606 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0889158248901367, |
|
"epoch": 2.57, |
|
"learning_rate": 5.040577661954731e-05, |
|
"loss": 0.9575, |
|
"step": 7120, |
|
"task_loss": 0.7835713624954224 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.707399845123291, |
|
"epoch": 2.58, |
|
"learning_rate": 5.0364126582520454e-05, |
|
"loss": 0.9368, |
|
"step": 7130, |
|
"task_loss": 0.6754993200302124 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8570347428321838, |
|
"epoch": 2.58, |
|
"learning_rate": 5.0322403625842056e-05, |
|
"loss": 0.8977, |
|
"step": 7140, |
|
"task_loss": 1.0160921812057495 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1405144929885864, |
|
"epoch": 2.58, |
|
"learning_rate": 5.028060789891323e-05, |
|
"loss": 0.9357, |
|
"step": 7150, |
|
"task_loss": 1.246722936630249 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8279078006744385, |
|
"epoch": 2.59, |
|
"learning_rate": 5.0238739551395684e-05, |
|
"loss": 0.8706, |
|
"step": 7160, |
|
"task_loss": 0.8700082302093506 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7681478261947632, |
|
"epoch": 2.59, |
|
"learning_rate": 5.019679873321117e-05, |
|
"loss": 0.874, |
|
"step": 7170, |
|
"task_loss": 0.9047025442123413 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7837231159210205, |
|
"epoch": 2.59, |
|
"learning_rate": 5.0154785594540905e-05, |
|
"loss": 0.7157, |
|
"step": 7180, |
|
"task_loss": 0.44350481033325195 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7090331315994263, |
|
"epoch": 2.6, |
|
"learning_rate": 5.0112700285825134e-05, |
|
"loss": 0.897, |
|
"step": 7190, |
|
"task_loss": 0.5929510593414307 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7562023401260376, |
|
"epoch": 2.6, |
|
"learning_rate": 5.007054295776246e-05, |
|
"loss": 0.8891, |
|
"step": 7200, |
|
"task_loss": 0.5599946975708008 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1266264915466309, |
|
"epoch": 2.61, |
|
"learning_rate": 5.002831376130942e-05, |
|
"loss": 1.002, |
|
"step": 7210, |
|
"task_loss": 1.0086379051208496 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6372247338294983, |
|
"epoch": 2.61, |
|
"learning_rate": 4.9986012847679855e-05, |
|
"loss": 0.8134, |
|
"step": 7220, |
|
"task_loss": 0.7226533889770508 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8293584585189819, |
|
"epoch": 2.61, |
|
"learning_rate": 4.9943640368344464e-05, |
|
"loss": 0.9293, |
|
"step": 7230, |
|
"task_loss": 1.0355051755905151 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0152957439422607, |
|
"epoch": 2.62, |
|
"learning_rate": 4.990119647503016e-05, |
|
"loss": 0.8569, |
|
"step": 7240, |
|
"task_loss": 1.2577180862426758 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9773650169372559, |
|
"epoch": 2.62, |
|
"learning_rate": 4.98586813197196e-05, |
|
"loss": 0.8754, |
|
"step": 7250, |
|
"task_loss": 1.1356110572814941 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_exact_match": 81.0879848628193, |
|
"eval_f1": 88.62796907187332, |
|
"step": 7250 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7708771824836731, |
|
"epoch": 2.62, |
|
"learning_rate": 4.9816095054650605e-05, |
|
"loss": 0.8444, |
|
"step": 7260, |
|
"task_loss": 0.6651926636695862 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1072356700897217, |
|
"epoch": 2.63, |
|
"learning_rate": 4.977343783231563e-05, |
|
"loss": 0.9954, |
|
"step": 7270, |
|
"task_loss": 1.019878625869751 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0349875688552856, |
|
"epoch": 2.63, |
|
"learning_rate": 4.9730709805461207e-05, |
|
"loss": 0.8893, |
|
"step": 7280, |
|
"task_loss": 0.8438126444816589 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8099746704101562, |
|
"epoch": 2.63, |
|
"learning_rate": 4.96879111270874e-05, |
|
"loss": 0.8308, |
|
"step": 7290, |
|
"task_loss": 0.702311635017395 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2747955322265625, |
|
"epoch": 2.64, |
|
"learning_rate": 4.964504195044729e-05, |
|
"loss": 0.9728, |
|
"step": 7300, |
|
"task_loss": 1.0143351554870605 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1044409275054932, |
|
"epoch": 2.64, |
|
"learning_rate": 4.960210242904637e-05, |
|
"loss": 0.9161, |
|
"step": 7310, |
|
"task_loss": 1.0179443359375 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9350137710571289, |
|
"epoch": 2.65, |
|
"learning_rate": 4.955909271664201e-05, |
|
"loss": 0.7646, |
|
"step": 7320, |
|
"task_loss": 0.8446428775787354 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.1761360168457031, |
|
"epoch": 2.65, |
|
"learning_rate": 4.951601296724296e-05, |
|
"loss": 0.9383, |
|
"step": 7330, |
|
"task_loss": 1.650334119796753 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2493460178375244, |
|
"epoch": 2.65, |
|
"learning_rate": 4.947286333510872e-05, |
|
"loss": 0.8523, |
|
"step": 7340, |
|
"task_loss": 1.3208677768707275 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.021695852279663, |
|
"epoch": 2.66, |
|
"learning_rate": 4.942964397474906e-05, |
|
"loss": 0.8172, |
|
"step": 7350, |
|
"task_loss": 0.7468795776367188 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8202105760574341, |
|
"epoch": 2.66, |
|
"learning_rate": 4.9386355040923396e-05, |
|
"loss": 0.7931, |
|
"step": 7360, |
|
"task_loss": 0.6085814833641052 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8396191000938416, |
|
"epoch": 2.66, |
|
"learning_rate": 4.934299668864031e-05, |
|
"loss": 0.9374, |
|
"step": 7370, |
|
"task_loss": 0.8447757959365845 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8239990472793579, |
|
"epoch": 2.67, |
|
"learning_rate": 4.929956907315692e-05, |
|
"loss": 0.9837, |
|
"step": 7380, |
|
"task_loss": 0.9020982980728149 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9127852916717529, |
|
"epoch": 2.67, |
|
"learning_rate": 4.9256072349978375e-05, |
|
"loss": 0.8494, |
|
"step": 7390, |
|
"task_loss": 0.6982762813568115 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0156875848770142, |
|
"epoch": 2.67, |
|
"learning_rate": 4.92125066748573e-05, |
|
"loss": 0.8612, |
|
"step": 7400, |
|
"task_loss": 0.7961791753768921 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.091951847076416, |
|
"epoch": 2.68, |
|
"learning_rate": 4.916887220379319e-05, |
|
"loss": 0.8624, |
|
"step": 7410, |
|
"task_loss": 0.8009432554244995 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8252087831497192, |
|
"epoch": 2.68, |
|
"learning_rate": 4.912516909303193e-05, |
|
"loss": 0.8018, |
|
"step": 7420, |
|
"task_loss": 0.7994389533996582 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0799195766448975, |
|
"epoch": 2.69, |
|
"learning_rate": 4.9081397499065144e-05, |
|
"loss": 0.8456, |
|
"step": 7430, |
|
"task_loss": 0.98964923620224 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9133539199829102, |
|
"epoch": 2.69, |
|
"learning_rate": 4.9037557578629696e-05, |
|
"loss": 0.7018, |
|
"step": 7440, |
|
"task_loss": 1.0649144649505615 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7813083529472351, |
|
"epoch": 2.69, |
|
"learning_rate": 4.8993649488707116e-05, |
|
"loss": 0.7919, |
|
"step": 7450, |
|
"task_loss": 0.7633917331695557 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.905107021331787, |
|
"epoch": 2.7, |
|
"learning_rate": 4.894967338652305e-05, |
|
"loss": 0.9709, |
|
"step": 7460, |
|
"task_loss": 1.3844743967056274 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8429162502288818, |
|
"epoch": 2.7, |
|
"learning_rate": 4.890562942954664e-05, |
|
"loss": 0.8252, |
|
"step": 7470, |
|
"task_loss": 0.8414809703826904 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7957346439361572, |
|
"epoch": 2.7, |
|
"learning_rate": 4.886151777549004e-05, |
|
"loss": 0.7663, |
|
"step": 7480, |
|
"task_loss": 0.5705951452255249 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7642992734909058, |
|
"epoch": 2.71, |
|
"learning_rate": 4.8817338582307804e-05, |
|
"loss": 0.9121, |
|
"step": 7490, |
|
"task_loss": 0.5456397533416748 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.651875376701355, |
|
"epoch": 2.71, |
|
"learning_rate": 4.877309200819631e-05, |
|
"loss": 0.8589, |
|
"step": 7500, |
|
"task_loss": 1.0916081666946411 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_exact_match": 81.42857142857143, |
|
"eval_f1": 88.55823715684251, |
|
"step": 7500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.2253670692443848, |
|
"epoch": 2.71, |
|
"learning_rate": 4.872877821159325e-05, |
|
"loss": 0.8689, |
|
"step": 7510, |
|
"task_loss": 0.8776388764381409 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7492198944091797, |
|
"epoch": 2.72, |
|
"learning_rate": 4.868439735117698e-05, |
|
"loss": 0.907, |
|
"step": 7520, |
|
"task_loss": 0.6415897607803345 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6262823343276978, |
|
"epoch": 2.72, |
|
"learning_rate": 4.863994958586604e-05, |
|
"loss": 0.8402, |
|
"step": 7530, |
|
"task_loss": 0.598420262336731 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.678829550743103, |
|
"epoch": 2.72, |
|
"learning_rate": 4.85954350748185e-05, |
|
"loss": 0.8539, |
|
"step": 7540, |
|
"task_loss": 0.8512382507324219 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.43244946002960205, |
|
"epoch": 2.73, |
|
"learning_rate": 4.855085397743149e-05, |
|
"loss": 0.913, |
|
"step": 7550, |
|
"task_loss": 0.4512510895729065 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9347466230392456, |
|
"epoch": 2.73, |
|
"learning_rate": 4.850620645334051e-05, |
|
"loss": 1.0302, |
|
"step": 7560, |
|
"task_loss": 1.0103384256362915 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0060513019561768, |
|
"epoch": 2.74, |
|
"learning_rate": 4.8461492662418953e-05, |
|
"loss": 1.0036, |
|
"step": 7570, |
|
"task_loss": 1.0222327709197998 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8404515981674194, |
|
"epoch": 2.74, |
|
"learning_rate": 4.8416712764777496e-05, |
|
"loss": 0.9034, |
|
"step": 7580, |
|
"task_loss": 1.377656102180481 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6688359975814819, |
|
"epoch": 2.74, |
|
"learning_rate": 4.837186692076353e-05, |
|
"loss": 0.7879, |
|
"step": 7590, |
|
"task_loss": 0.7730237245559692 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0158637762069702, |
|
"epoch": 2.75, |
|
"learning_rate": 4.832695529096059e-05, |
|
"loss": 0.8966, |
|
"step": 7600, |
|
"task_loss": 0.9950785636901855 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8134995698928833, |
|
"epoch": 2.75, |
|
"learning_rate": 4.8281978036187764e-05, |
|
"loss": 0.9182, |
|
"step": 7610, |
|
"task_loss": 0.6545308828353882 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5976283550262451, |
|
"epoch": 2.75, |
|
"learning_rate": 4.823693531749912e-05, |
|
"loss": 0.8383, |
|
"step": 7620, |
|
"task_loss": 0.8224766254425049 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8086435794830322, |
|
"epoch": 2.76, |
|
"learning_rate": 4.8191827296183185e-05, |
|
"loss": 0.9204, |
|
"step": 7630, |
|
"task_loss": 0.6123383641242981 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.7645212411880493, |
|
"epoch": 2.76, |
|
"learning_rate": 4.814665413376226e-05, |
|
"loss": 0.783, |
|
"step": 7640, |
|
"task_loss": 0.8700419068336487 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.0566661357879639, |
|
"epoch": 2.76, |
|
"learning_rate": 4.8101415991991965e-05, |
|
"loss": 0.8458, |
|
"step": 7650, |
|
"task_loss": 1.4529619216918945 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.050924301147461, |
|
"epoch": 2.77, |
|
"learning_rate": 4.805611303286053e-05, |
|
"loss": 0.8807, |
|
"step": 7660, |
|
"task_loss": 1.085532784461975 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5178258419036865, |
|
"epoch": 2.77, |
|
"learning_rate": 4.801074541858835e-05, |
|
"loss": 0.803, |
|
"step": 7670, |
|
"task_loss": 0.5137467384338379 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5854527354240417, |
|
"epoch": 2.78, |
|
"learning_rate": 4.7965313311627286e-05, |
|
"loss": 0.8646, |
|
"step": 7680, |
|
"task_loss": 0.5222367644309998 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6712926030158997, |
|
"epoch": 2.78, |
|
"learning_rate": 4.7919816874660145e-05, |
|
"loss": 0.8015, |
|
"step": 7690, |
|
"task_loss": 0.6108640432357788 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.8817957639694214, |
|
"epoch": 2.78, |
|
"learning_rate": 4.787425627060011e-05, |
|
"loss": 0.8394, |
|
"step": 7700, |
|
"task_loss": 1.0321855545043945 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 1.3753621578216553, |
|
"epoch": 2.79, |
|
"learning_rate": 4.78286316625901e-05, |
|
"loss": 0.9047, |
|
"step": 7710, |
|
"task_loss": 1.332418441772461 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6856953501701355, |
|
"epoch": 2.79, |
|
"learning_rate": 4.778294321400225e-05, |
|
"loss": 0.8821, |
|
"step": 7720, |
|
"task_loss": 1.043306589126587 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.5687346458435059, |
|
"epoch": 2.79, |
|
"learning_rate": 4.773719108843727e-05, |
|
"loss": 0.8, |
|
"step": 7730, |
|
"task_loss": 0.7144980430603027 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.6648460626602173, |
|
"epoch": 2.8, |
|
"learning_rate": 4.769137544972389e-05, |
|
"loss": 0.7907, |
|
"step": 7740, |
|
"task_loss": 1.0056627988815308 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"distillation_loss": 0.9545556306838989, |
|
"epoch": 2.8, |
|
"learning_rate": 4.764549646191827e-05, |
|
"loss": 0.8531, |
|
"step": 7750, |
|
"task_loss": 1.079555869102478 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_exact_match": 81.42857142857143, |
|
"eval_f1": 88.77005789642749, |
|
"step": 7750 |
|
} |
|
], |
|
"max_steps": 55340, |
|
"num_train_epochs": 20, |
|
"total_flos": 5914832349173760.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|