|
{ |
|
"best_metric": 0.9049755301794453, |
|
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-2180", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.959507465362549, |
|
"learning_rate": 0.004977064220183487, |
|
"loss": 1.9585, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.8976335525512695, |
|
"learning_rate": 0.004954128440366973, |
|
"loss": 1.3768, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.9822118282318115, |
|
"learning_rate": 0.004931192660550459, |
|
"loss": 1.4236, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.5010669231414795, |
|
"learning_rate": 0.004908256880733945, |
|
"loss": 1.2595, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.538541793823242, |
|
"learning_rate": 0.004887614678899083, |
|
"loss": 1.4165, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.172377586364746, |
|
"learning_rate": 0.004864678899082569, |
|
"loss": 1.1823, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.388909101486206, |
|
"learning_rate": 0.004841743119266055, |
|
"loss": 1.1399, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.9836621284484863, |
|
"learning_rate": 0.004818807339449541, |
|
"loss": 1.1484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.9791626930236816, |
|
"learning_rate": 0.004795871559633028, |
|
"loss": 1.1652, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.8931291103363037, |
|
"learning_rate": 0.004772935779816514, |
|
"loss": 1.0614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.281805515289307, |
|
"learning_rate": 0.00475, |
|
"loss": 1.1816, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8973846435546875, |
|
"learning_rate": 0.0047270642201834865, |
|
"loss": 1.0441, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.476250648498535, |
|
"learning_rate": 0.004704128440366973, |
|
"loss": 1.1724, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.405545711517334, |
|
"learning_rate": 0.004681192660550459, |
|
"loss": 1.1089, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.545285940170288, |
|
"learning_rate": 0.0046582568807339445, |
|
"loss": 0.9702, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.000765085220337, |
|
"learning_rate": 0.004635321100917431, |
|
"loss": 1.0029, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.3316946029663086, |
|
"learning_rate": 0.004612385321100918, |
|
"loss": 1.003, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.2348740100860596, |
|
"learning_rate": 0.004589449541284404, |
|
"loss": 1.0428, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.1600568294525146, |
|
"learning_rate": 0.00456651376146789, |
|
"loss": 1.0674, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.6075286865234375, |
|
"learning_rate": 0.0045435779816513765, |
|
"loss": 1.0286, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.731745719909668, |
|
"learning_rate": 0.004520642201834862, |
|
"loss": 0.9608, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7765089722675367, |
|
"eval_f1": 0.7007285514340752, |
|
"eval_loss": 0.6055155992507935, |
|
"eval_precision": 0.7235333775207775, |
|
"eval_recall": 0.7232886628310071, |
|
"eval_runtime": 20.7254, |
|
"eval_samples_per_second": 118.309, |
|
"eval_steps_per_second": 7.43, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.186959981918335, |
|
"learning_rate": 0.004497706422018349, |
|
"loss": 1.0296, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 2.0718557834625244, |
|
"learning_rate": 0.0044747706422018346, |
|
"loss": 0.9602, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.098830461502075, |
|
"learning_rate": 0.004451834862385321, |
|
"loss": 0.9453, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.486630439758301, |
|
"learning_rate": 0.004428899082568808, |
|
"loss": 0.9209, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 2.0856552124023438, |
|
"learning_rate": 0.0044059633027522934, |
|
"loss": 1.0935, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 1.6093757152557373, |
|
"learning_rate": 0.00438302752293578, |
|
"loss": 1.0387, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.9845185279846191, |
|
"learning_rate": 0.004360091743119266, |
|
"loss": 0.9536, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.10725474357605, |
|
"learning_rate": 0.004337155963302752, |
|
"loss": 0.8167, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 2.1504321098327637, |
|
"learning_rate": 0.004314220183486239, |
|
"loss": 0.9638, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.8799842596054077, |
|
"learning_rate": 0.004291284403669725, |
|
"loss": 0.9503, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 2.466273069381714, |
|
"learning_rate": 0.004268348623853211, |
|
"loss": 0.9487, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.635208249092102, |
|
"learning_rate": 0.004245412844036698, |
|
"loss": 0.9618, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.8513455390930176, |
|
"learning_rate": 0.0042224770642201835, |
|
"loss": 0.9647, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 2.562163829803467, |
|
"learning_rate": 0.004199541284403669, |
|
"loss": 0.9159, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 3.5535786151885986, |
|
"learning_rate": 0.004176605504587156, |
|
"loss": 1.109, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.6485819816589355, |
|
"learning_rate": 0.004153669724770642, |
|
"loss": 1.0044, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 2.7095954418182373, |
|
"learning_rate": 0.004130733944954129, |
|
"loss": 0.9123, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.9968432188034058, |
|
"learning_rate": 0.004107798165137615, |
|
"loss": 0.9469, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.2975969314575195, |
|
"learning_rate": 0.004084862385321101, |
|
"loss": 0.9568, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 3.4922454357147217, |
|
"learning_rate": 0.004061926605504587, |
|
"loss": 0.9636, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.8210103511810303, |
|
"learning_rate": 0.004038990825688074, |
|
"loss": 0.9296, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 2.032224655151367, |
|
"learning_rate": 0.004016055045871559, |
|
"loss": 0.9984, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8066884176182708, |
|
"eval_f1": 0.7113876383080939, |
|
"eval_loss": 0.4811672270298004, |
|
"eval_precision": 0.7265273055775476, |
|
"eval_recall": 0.7320681182395002, |
|
"eval_runtime": 20.7162, |
|
"eval_samples_per_second": 118.362, |
|
"eval_steps_per_second": 7.434, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.7719513177871704, |
|
"learning_rate": 0.003995412844036697, |
|
"loss": 0.9865, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 2.115173578262329, |
|
"learning_rate": 0.003972477064220183, |
|
"loss": 0.9227, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 1.707400918006897, |
|
"learning_rate": 0.00394954128440367, |
|
"loss": 0.9195, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 2.7709805965423584, |
|
"learning_rate": 0.0039266055045871565, |
|
"loss": 0.8792, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 2.1106033325195312, |
|
"learning_rate": 0.003903669724770642, |
|
"loss": 0.9282, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.2281219959259033, |
|
"learning_rate": 0.0038807339449541283, |
|
"loss": 0.8242, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 1.9189764261245728, |
|
"learning_rate": 0.003857798165137615, |
|
"loss": 0.8428, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 2.04294753074646, |
|
"learning_rate": 0.003834862385321101, |
|
"loss": 0.8726, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 2.139575481414795, |
|
"learning_rate": 0.003811926605504587, |
|
"loss": 0.9184, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 2.527928113937378, |
|
"learning_rate": 0.0037889908256880734, |
|
"loss": 0.934, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 1.9021204710006714, |
|
"learning_rate": 0.00376605504587156, |
|
"loss": 0.9117, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 2.099567174911499, |
|
"learning_rate": 0.003743119266055046, |
|
"loss": 0.7403, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.6857088804244995, |
|
"learning_rate": 0.003720183486238532, |
|
"loss": 0.8173, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 2.216789960861206, |
|
"learning_rate": 0.0036972477064220184, |
|
"loss": 0.9077, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 1.2876479625701904, |
|
"learning_rate": 0.0036743119266055045, |
|
"loss": 0.8218, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.7834821939468384, |
|
"learning_rate": 0.003651376146788991, |
|
"loss": 0.8954, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 2.3635175228118896, |
|
"learning_rate": 0.003628440366972477, |
|
"loss": 0.8818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.9978779554367065, |
|
"learning_rate": 0.0036055045871559634, |
|
"loss": 0.9062, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 2.9038476943969727, |
|
"learning_rate": 0.0035825688073394496, |
|
"loss": 0.8452, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.843595027923584, |
|
"learning_rate": 0.003559633027522936, |
|
"loss": 0.8291, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 2.618366241455078, |
|
"learning_rate": 0.003536697247706422, |
|
"loss": 0.8661, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.8442063331604004, |
|
"learning_rate": 0.003513761467889908, |
|
"loss": 0.8265, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.851957585644372, |
|
"eval_f1": 0.768301171782241, |
|
"eval_loss": 0.3725639581680298, |
|
"eval_precision": 0.800477076178721, |
|
"eval_recall": 0.771278337679668, |
|
"eval_runtime": 20.7103, |
|
"eval_samples_per_second": 118.395, |
|
"eval_steps_per_second": 7.436, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 2.050394058227539, |
|
"learning_rate": 0.0034908256880733946, |
|
"loss": 0.7888, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 2.184720993041992, |
|
"learning_rate": 0.003467889908256881, |
|
"loss": 0.8631, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 1.606249213218689, |
|
"learning_rate": 0.003444954128440367, |
|
"loss": 0.7822, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 2.3371753692626953, |
|
"learning_rate": 0.003422018348623853, |
|
"loss": 0.7497, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 2.1213464736938477, |
|
"learning_rate": 0.0033990825688073397, |
|
"loss": 0.8359, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 2.4342410564422607, |
|
"learning_rate": 0.003376146788990826, |
|
"loss": 0.8393, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 2.252201557159424, |
|
"learning_rate": 0.003353211009174312, |
|
"loss": 0.771, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 2.0991744995117188, |
|
"learning_rate": 0.003330275229357798, |
|
"loss": 0.8322, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 1.4995887279510498, |
|
"learning_rate": 0.0033073394495412847, |
|
"loss": 0.8652, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 1.8054556846618652, |
|
"learning_rate": 0.003284403669724771, |
|
"loss": 0.8594, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"grad_norm": 1.846091389656067, |
|
"learning_rate": 0.0032614678899082566, |
|
"loss": 0.8507, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 2.486276149749756, |
|
"learning_rate": 0.003238532110091743, |
|
"loss": 0.8608, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 1.9299883842468262, |
|
"learning_rate": 0.0032155963302752293, |
|
"loss": 0.7887, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 1.7495784759521484, |
|
"learning_rate": 0.003192660550458716, |
|
"loss": 0.8364, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 1.4023948907852173, |
|
"learning_rate": 0.0031697247706422016, |
|
"loss": 0.8239, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"grad_norm": 2.4405996799468994, |
|
"learning_rate": 0.003146788990825688, |
|
"loss": 0.7324, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 2.8213984966278076, |
|
"learning_rate": 0.0031238532110091743, |
|
"loss": 0.8305, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"grad_norm": 1.9884538650512695, |
|
"learning_rate": 0.003100917431192661, |
|
"loss": 0.823, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 1.3576688766479492, |
|
"learning_rate": 0.0030779816513761466, |
|
"loss": 0.7203, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 1.8440253734588623, |
|
"learning_rate": 0.0030550458715596328, |
|
"loss": 0.7564, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"grad_norm": 1.383427381515503, |
|
"learning_rate": 0.0030321100917431194, |
|
"loss": 0.7679, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"grad_norm": 2.3057126998901367, |
|
"learning_rate": 0.003009174311926606, |
|
"loss": 0.7938, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8507340946166395, |
|
"eval_f1": 0.7554407269192587, |
|
"eval_loss": 0.39133650064468384, |
|
"eval_precision": 0.7811773459124496, |
|
"eval_recall": 0.7830608562256607, |
|
"eval_runtime": 20.8094, |
|
"eval_samples_per_second": 117.831, |
|
"eval_steps_per_second": 7.4, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 1.3315050601959229, |
|
"learning_rate": 0.0029862385321100917, |
|
"loss": 0.7701, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 2.4050934314727783, |
|
"learning_rate": 0.002963302752293578, |
|
"loss": 0.7967, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"grad_norm": 1.3627427816390991, |
|
"learning_rate": 0.0029403669724770644, |
|
"loss": 0.8184, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 2.152087688446045, |
|
"learning_rate": 0.0029174311926605506, |
|
"loss": 0.7987, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 2.42413067817688, |
|
"learning_rate": 0.0028944954128440367, |
|
"loss": 0.7772, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"grad_norm": 1.8017699718475342, |
|
"learning_rate": 0.002871559633027523, |
|
"loss": 0.7587, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"grad_norm": 1.805237889289856, |
|
"learning_rate": 0.0028486238532110094, |
|
"loss": 0.7326, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 2.2268106937408447, |
|
"learning_rate": 0.0028256880733944956, |
|
"loss": 0.7293, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 2.386366605758667, |
|
"learning_rate": 0.0028027522935779813, |
|
"loss": 0.7396, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 2.0315840244293213, |
|
"learning_rate": 0.002779816513761468, |
|
"loss": 0.7131, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 2.1899399757385254, |
|
"learning_rate": 0.002756880733944954, |
|
"loss": 0.8073, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"grad_norm": 1.614035725593567, |
|
"learning_rate": 0.0027339449541284406, |
|
"loss": 0.7168, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"grad_norm": 2.4006643295288086, |
|
"learning_rate": 0.0027110091743119263, |
|
"loss": 0.7774, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"grad_norm": 2.3257529735565186, |
|
"learning_rate": 0.002688073394495413, |
|
"loss": 0.7808, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 1.6664689779281616, |
|
"learning_rate": 0.002665137614678899, |
|
"loss": 0.717, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 1.7779196500778198, |
|
"learning_rate": 0.0026422018348623857, |
|
"loss": 0.7697, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"grad_norm": 1.8458689451217651, |
|
"learning_rate": 0.0026192660550458714, |
|
"loss": 0.7384, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 1.5530970096588135, |
|
"learning_rate": 0.0025963302752293575, |
|
"loss": 0.7411, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 2.550743341445923, |
|
"learning_rate": 0.002573394495412844, |
|
"loss": 0.6985, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"grad_norm": 1.284894347190857, |
|
"learning_rate": 0.0025504587155963303, |
|
"loss": 0.7961, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 1.5528652667999268, |
|
"learning_rate": 0.0025275229357798164, |
|
"loss": 0.8198, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.2907180786132812, |
|
"learning_rate": 0.0025045871559633026, |
|
"loss": 0.8149, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8531810766721044, |
|
"eval_f1": 0.7701622667658707, |
|
"eval_loss": 0.3675837218761444, |
|
"eval_precision": 0.7686624499292936, |
|
"eval_recall": 0.8001667562361665, |
|
"eval_runtime": 20.7465, |
|
"eval_samples_per_second": 118.189, |
|
"eval_steps_per_second": 7.423, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 1.3846713304519653, |
|
"learning_rate": 0.002481651376146789, |
|
"loss": 0.6938, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 1.6576237678527832, |
|
"learning_rate": 0.0024587155963302753, |
|
"loss": 0.7169, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"grad_norm": 1.8432438373565674, |
|
"learning_rate": 0.0024357798165137614, |
|
"loss": 0.7242, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 1.9799226522445679, |
|
"learning_rate": 0.002412844036697248, |
|
"loss": 0.6667, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"grad_norm": 1.92507004737854, |
|
"learning_rate": 0.002389908256880734, |
|
"loss": 0.6681, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 1.4437962770462036, |
|
"learning_rate": 0.0023669724770642203, |
|
"loss": 0.6618, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"grad_norm": 1.565990924835205, |
|
"learning_rate": 0.0023440366972477065, |
|
"loss": 0.7162, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"grad_norm": 1.7609509229660034, |
|
"learning_rate": 0.0023211009174311926, |
|
"loss": 0.6953, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"grad_norm": 1.402076005935669, |
|
"learning_rate": 0.002298165137614679, |
|
"loss": 0.7114, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"grad_norm": 1.542944312095642, |
|
"learning_rate": 0.0022752293577981654, |
|
"loss": 0.7695, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"grad_norm": 1.3250664472579956, |
|
"learning_rate": 0.0022522935779816515, |
|
"loss": 0.6984, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 1.790968894958496, |
|
"learning_rate": 0.0022293577981651377, |
|
"loss": 0.7457, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 1.8708348274230957, |
|
"learning_rate": 0.002206422018348624, |
|
"loss": 0.6854, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"grad_norm": 1.3669894933700562, |
|
"learning_rate": 0.0021834862385321104, |
|
"loss": 0.7601, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"grad_norm": 1.6164820194244385, |
|
"learning_rate": 0.0021605504587155966, |
|
"loss": 0.7015, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"grad_norm": 2.5344796180725098, |
|
"learning_rate": 0.0021376146788990827, |
|
"loss": 0.6816, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"grad_norm": 1.8224860429763794, |
|
"learning_rate": 0.002114678899082569, |
|
"loss": 0.7091, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"grad_norm": 2.3349053859710693, |
|
"learning_rate": 0.002091743119266055, |
|
"loss": 0.69, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"grad_norm": 1.742497444152832, |
|
"learning_rate": 0.002068807339449541, |
|
"loss": 0.6835, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 1.6536716222763062, |
|
"learning_rate": 0.0020458715596330277, |
|
"loss": 0.6827, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 1.6461354494094849, |
|
"learning_rate": 0.002022935779816514, |
|
"loss": 0.6737, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8674551386623165, |
|
"eval_f1": 0.7933729473887552, |
|
"eval_loss": 0.3304790258407593, |
|
"eval_precision": 0.8306122822846135, |
|
"eval_recall": 0.8116698887283952, |
|
"eval_runtime": 20.7895, |
|
"eval_samples_per_second": 117.944, |
|
"eval_steps_per_second": 7.408, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 1.2140992879867554, |
|
"learning_rate": 0.002, |
|
"loss": 0.6921, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 2.1813745498657227, |
|
"learning_rate": 0.001977064220183486, |
|
"loss": 0.7078, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 1.2994489669799805, |
|
"learning_rate": 0.0019541284403669728, |
|
"loss": 0.6949, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 1.4426566362380981, |
|
"learning_rate": 0.0019311926605504587, |
|
"loss": 0.6685, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 1.5542079210281372, |
|
"learning_rate": 0.001908256880733945, |
|
"loss": 0.6823, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 1.5096594095230103, |
|
"learning_rate": 0.0018853211009174312, |
|
"loss": 0.6203, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"grad_norm": 2.132112979888916, |
|
"learning_rate": 0.0018623853211009176, |
|
"loss": 0.6942, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"grad_norm": 2.1632845401763916, |
|
"learning_rate": 0.0018394495412844037, |
|
"loss": 0.5899, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"grad_norm": 1.6782176494598389, |
|
"learning_rate": 0.0018165137614678901, |
|
"loss": 0.6316, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"grad_norm": 1.1974135637283325, |
|
"learning_rate": 0.001793577981651376, |
|
"loss": 0.609, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"grad_norm": 1.8140075206756592, |
|
"learning_rate": 0.0017706422018348624, |
|
"loss": 0.65, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"grad_norm": 1.5135620832443237, |
|
"learning_rate": 0.0017477064220183486, |
|
"loss": 0.6201, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"grad_norm": 1.6838016510009766, |
|
"learning_rate": 0.001724770642201835, |
|
"loss": 0.6044, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"grad_norm": 1.55550217628479, |
|
"learning_rate": 0.001701834862385321, |
|
"loss": 0.6753, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"grad_norm": 1.8122529983520508, |
|
"learning_rate": 0.0016788990825688074, |
|
"loss": 0.6608, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"grad_norm": 2.207045555114746, |
|
"learning_rate": 0.0016559633027522936, |
|
"loss": 0.6132, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"grad_norm": 0.9892495274543762, |
|
"learning_rate": 0.00163302752293578, |
|
"loss": 0.6177, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"grad_norm": 2.4809529781341553, |
|
"learning_rate": 0.0016100917431192661, |
|
"loss": 0.5607, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"grad_norm": 1.430293083190918, |
|
"learning_rate": 0.0015871559633027525, |
|
"loss": 0.6822, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 1.6523278951644897, |
|
"learning_rate": 0.0015642201834862384, |
|
"loss": 0.6486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"grad_norm": 1.5746197700500488, |
|
"learning_rate": 0.0015412844036697248, |
|
"loss": 0.5902, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"grad_norm": 1.5833261013031006, |
|
"learning_rate": 0.001518348623853211, |
|
"loss": 0.5695, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9029363784665579, |
|
"eval_f1": 0.8321123007962286, |
|
"eval_loss": 0.24805299937725067, |
|
"eval_precision": 0.8545669814728453, |
|
"eval_recall": 0.8469225407396054, |
|
"eval_runtime": 20.7317, |
|
"eval_samples_per_second": 118.273, |
|
"eval_steps_per_second": 7.428, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 1.8044548034667969, |
|
"learning_rate": 0.0014954128440366973, |
|
"loss": 0.5888, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 1.9052773714065552, |
|
"learning_rate": 0.0014724770642201835, |
|
"loss": 0.5968, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"grad_norm": 1.835716962814331, |
|
"learning_rate": 0.0014495412844036698, |
|
"loss": 0.642, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"grad_norm": 1.3683079481124878, |
|
"learning_rate": 0.001426605504587156, |
|
"loss": 0.6627, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 1.9506031274795532, |
|
"learning_rate": 0.0014036697247706423, |
|
"loss": 0.6054, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 1.300554871559143, |
|
"learning_rate": 0.0013807339449541285, |
|
"loss": 0.6375, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"grad_norm": 2.0933167934417725, |
|
"learning_rate": 0.0013577981651376149, |
|
"loss": 0.5603, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"grad_norm": 1.0951093435287476, |
|
"learning_rate": 0.0013348623853211008, |
|
"loss": 0.5836, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"grad_norm": 1.186511516571045, |
|
"learning_rate": 0.0013119266055045872, |
|
"loss": 0.5877, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 1.5988391637802124, |
|
"learning_rate": 0.0012889908256880733, |
|
"loss": 0.5868, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"grad_norm": 2.0407028198242188, |
|
"learning_rate": 0.0012660550458715597, |
|
"loss": 0.564, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"grad_norm": 1.4676613807678223, |
|
"learning_rate": 0.001243119266055046, |
|
"loss": 0.6205, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"grad_norm": 1.1794471740722656, |
|
"learning_rate": 0.0012201834862385322, |
|
"loss": 0.581, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"grad_norm": 1.7204222679138184, |
|
"learning_rate": 0.0011972477064220183, |
|
"loss": 0.6433, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"grad_norm": 1.7347933053970337, |
|
"learning_rate": 0.0011743119266055047, |
|
"loss": 0.6132, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"grad_norm": 1.0963069200515747, |
|
"learning_rate": 0.0011513761467889909, |
|
"loss": 0.5961, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 1.0383723974227905, |
|
"learning_rate": 0.0011284403669724772, |
|
"loss": 0.5934, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 1.314132809638977, |
|
"learning_rate": 0.0011055045871559634, |
|
"loss": 0.5667, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 1.8448032140731812, |
|
"learning_rate": 0.0010825688073394495, |
|
"loss": 0.5999, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"grad_norm": 1.187971591949463, |
|
"learning_rate": 0.001059633027522936, |
|
"loss": 0.5668, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"grad_norm": 1.451027512550354, |
|
"learning_rate": 0.001036697247706422, |
|
"loss": 0.5928, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"grad_norm": 1.5274406671524048, |
|
"learning_rate": 0.0010137614678899084, |
|
"loss": 0.5857, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8923327895595432, |
|
"eval_f1": 0.8339907958679574, |
|
"eval_loss": 0.2911706566810608, |
|
"eval_precision": 0.8463533280759077, |
|
"eval_recall": 0.8356417778568457, |
|
"eval_runtime": 20.8398, |
|
"eval_samples_per_second": 117.66, |
|
"eval_steps_per_second": 7.39, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 1.254504919052124, |
|
"learning_rate": 0.0009908256880733946, |
|
"loss": 0.5059, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 1.3762636184692383, |
|
"learning_rate": 0.0009678899082568808, |
|
"loss": 0.578, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 1.472004771232605, |
|
"learning_rate": 0.0009449541284403671, |
|
"loss": 0.5216, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"grad_norm": 1.5876396894454956, |
|
"learning_rate": 0.0009220183486238532, |
|
"loss": 0.5689, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"grad_norm": 1.2409037351608276, |
|
"learning_rate": 0.0008990825688073395, |
|
"loss": 0.5571, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"grad_norm": 1.2082581520080566, |
|
"learning_rate": 0.0008761467889908258, |
|
"loss": 0.5809, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"grad_norm": 1.7171647548675537, |
|
"learning_rate": 0.000853211009174312, |
|
"loss": 0.5626, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"grad_norm": 1.3834128379821777, |
|
"learning_rate": 0.0008302752293577983, |
|
"loss": 0.5362, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"grad_norm": 1.424574375152588, |
|
"learning_rate": 0.0008073394495412844, |
|
"loss": 0.5418, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"grad_norm": 1.5661699771881104, |
|
"learning_rate": 0.0007844036697247707, |
|
"loss": 0.5609, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 1.9551769495010376, |
|
"learning_rate": 0.0007614678899082569, |
|
"loss": 0.564, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"grad_norm": 1.084270715713501, |
|
"learning_rate": 0.0007385321100917432, |
|
"loss": 0.5121, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 1.446973204612732, |
|
"learning_rate": 0.0007155963302752295, |
|
"loss": 0.569, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"grad_norm": 1.3539941310882568, |
|
"learning_rate": 0.0006926605504587156, |
|
"loss": 0.525, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"grad_norm": 1.413223147392273, |
|
"learning_rate": 0.0006697247706422019, |
|
"loss": 0.4403, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"grad_norm": 1.3614115715026855, |
|
"learning_rate": 0.0006467889908256881, |
|
"loss": 0.553, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"grad_norm": 1.2185453176498413, |
|
"learning_rate": 0.0006238532110091744, |
|
"loss": 0.5498, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"grad_norm": 1.3884397745132446, |
|
"learning_rate": 0.0006009174311926606, |
|
"loss": 0.5092, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"grad_norm": 1.2725589275360107, |
|
"learning_rate": 0.0005779816513761468, |
|
"loss": 0.5695, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"grad_norm": 1.2434947490692139, |
|
"learning_rate": 0.000555045871559633, |
|
"loss": 0.4998, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 1.378146767616272, |
|
"learning_rate": 0.0005321100917431193, |
|
"loss": 0.5091, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"grad_norm": 1.3200100660324097, |
|
"learning_rate": 0.0005091743119266056, |
|
"loss": 0.4834, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.899673735725938, |
|
"eval_f1": 0.8285783762787183, |
|
"eval_loss": 0.2657569944858551, |
|
"eval_precision": 0.8428024691807685, |
|
"eval_recall": 0.8409794358062922, |
|
"eval_runtime": 20.7116, |
|
"eval_samples_per_second": 118.388, |
|
"eval_steps_per_second": 7.435, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 1.1076021194458008, |
|
"learning_rate": 0.0004862385321100918, |
|
"loss": 0.5164, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 1.50495445728302, |
|
"learning_rate": 0.00046330275229357804, |
|
"loss": 0.4388, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 1.4007256031036377, |
|
"learning_rate": 0.00044036697247706424, |
|
"loss": 0.4761, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"grad_norm": 1.226333737373352, |
|
"learning_rate": 0.0004174311926605505, |
|
"loss": 0.4861, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 1.937953233718872, |
|
"learning_rate": 0.0003944954128440367, |
|
"loss": 0.4604, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"grad_norm": 1.35695219039917, |
|
"learning_rate": 0.00037155963302752296, |
|
"loss": 0.4633, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"grad_norm": 1.521087884902954, |
|
"learning_rate": 0.0003486238532110092, |
|
"loss": 0.4512, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 1.4338667392730713, |
|
"learning_rate": 0.0003256880733944954, |
|
"loss": 0.5439, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"grad_norm": 1.7897557020187378, |
|
"learning_rate": 0.0003027522935779817, |
|
"loss": 0.5278, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": 1.0933929681777954, |
|
"learning_rate": 0.0002798165137614679, |
|
"loss": 0.4592, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 1.2754806280136108, |
|
"learning_rate": 0.00025688073394495415, |
|
"loss": 0.476, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"grad_norm": 1.724406361579895, |
|
"learning_rate": 0.00023394495412844038, |
|
"loss": 0.4963, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 1.5604366064071655, |
|
"learning_rate": 0.0002110091743119266, |
|
"loss": 0.4926, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"grad_norm": 1.2408415079116821, |
|
"learning_rate": 0.00018807339449541287, |
|
"loss": 0.4954, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.8950926065444946, |
|
"learning_rate": 0.0001651376146788991, |
|
"loss": 0.4444, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"grad_norm": 1.2199152708053589, |
|
"learning_rate": 0.00014220183486238534, |
|
"loss": 0.4606, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 1.1516410112380981, |
|
"learning_rate": 0.00011926605504587157, |
|
"loss": 0.4691, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 2.192310333251953, |
|
"learning_rate": 9.63302752293578e-05, |
|
"loss": 0.5306, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"grad_norm": 1.2649255990982056, |
|
"learning_rate": 7.339449541284404e-05, |
|
"loss": 0.5238, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"grad_norm": 1.5681147575378418, |
|
"learning_rate": 5.0458715596330276e-05, |
|
"loss": 0.4631, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 1.3230477571487427, |
|
"learning_rate": 2.7522935779816515e-05, |
|
"loss": 0.4716, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.0949994325637817, |
|
"learning_rate": 4.587155963302753e-06, |
|
"loss": 0.5287, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9049755301794453, |
|
"eval_f1": 0.8468363122524573, |
|
"eval_loss": 0.2590215504169464, |
|
"eval_precision": 0.8523921546408058, |
|
"eval_recall": 0.8468399455092634, |
|
"eval_runtime": 20.6801, |
|
"eval_samples_per_second": 118.568, |
|
"eval_steps_per_second": 7.447, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2180, |
|
"total_flos": 1.087396245164114e+19, |
|
"train_loss": 0.7537981033325195, |
|
"train_runtime": 2484.6922, |
|
"train_samples_per_second": 56.104, |
|
"train_steps_per_second": 0.877 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8239891267414203, |
|
"eval_f1": 0.7852027158639473, |
|
"eval_loss": 0.4609430432319641, |
|
"eval_precision": 0.7894503286617733, |
|
"eval_recall": 0.7820860597039324, |
|
"eval_runtime": 74.5465, |
|
"eval_samples_per_second": 118.436, |
|
"eval_steps_per_second": 7.405, |
|
"step": 2180 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2180, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.087396245164114e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|