|
{ |
|
"best_metric": 0.7726819541375872, |
|
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-1090", |
|
"epoch": 9.954337899543379, |
|
"eval_steps": 500, |
|
"global_step": 1090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.391805171966553, |
|
"learning_rate": 0.004954128440366973, |
|
"loss": 1.2554, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9513263702392578, |
|
"learning_rate": 0.004908256880733945, |
|
"loss": 1.0074, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.8831961154937744, |
|
"learning_rate": 0.004862385321100918, |
|
"loss": 0.9068, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.7356910705566406, |
|
"learning_rate": 0.00481651376146789, |
|
"loss": 0.9577, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.5790963172912598, |
|
"learning_rate": 0.0047706422018348625, |
|
"loss": 0.9124, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.810718297958374, |
|
"learning_rate": 0.004724770642201835, |
|
"loss": 0.8901, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.3134875297546387, |
|
"learning_rate": 0.004678899082568808, |
|
"loss": 0.9111, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.8082305192947388, |
|
"learning_rate": 0.00463302752293578, |
|
"loss": 0.9059, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.4370285272598267, |
|
"learning_rate": 0.0045871559633027525, |
|
"loss": 0.9984, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.8687102794647217, |
|
"learning_rate": 0.004541284403669725, |
|
"loss": 0.9135, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7198404785643071, |
|
"eval_f1": 0.3049533355905189, |
|
"eval_loss": 0.7698342800140381, |
|
"eval_precision": 0.5179078489450006, |
|
"eval_recall": 0.3103063518754385, |
|
"eval_runtime": 5.826, |
|
"eval_samples_per_second": 172.159, |
|
"eval_steps_per_second": 10.814, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.885165810585022, |
|
"learning_rate": 0.004495412844036698, |
|
"loss": 0.8622, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.4616353511810303, |
|
"learning_rate": 0.0044495412844036695, |
|
"loss": 0.9407, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.2185616493225098, |
|
"learning_rate": 0.004403669724770643, |
|
"loss": 0.8853, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.0212063789367676, |
|
"learning_rate": 0.004357798165137615, |
|
"loss": 0.8534, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.129385232925415, |
|
"learning_rate": 0.004311926605504587, |
|
"loss": 0.8613, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.726841926574707, |
|
"learning_rate": 0.0042660550458715595, |
|
"loss": 0.8104, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.272818088531494, |
|
"learning_rate": 0.004220183486238533, |
|
"loss": 0.915, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.117639422416687, |
|
"learning_rate": 0.004174311926605505, |
|
"loss": 0.8753, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.8981102108955383, |
|
"learning_rate": 0.004128440366972477, |
|
"loss": 0.8429, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.282863974571228, |
|
"learning_rate": 0.00408256880733945, |
|
"loss": 0.8355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.9718325734138489, |
|
"learning_rate": 0.004036697247706422, |
|
"loss": 0.8352, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7298105682951147, |
|
"eval_f1": 0.3883733866690214, |
|
"eval_loss": 0.7352049350738525, |
|
"eval_precision": 0.536225724752637, |
|
"eval_recall": 0.42308831430611293, |
|
"eval_runtime": 5.7891, |
|
"eval_samples_per_second": 173.258, |
|
"eval_steps_per_second": 10.883, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 1.3424675464630127, |
|
"learning_rate": 0.003990825688073394, |
|
"loss": 0.8943, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.352690577507019, |
|
"learning_rate": 0.003944954128440367, |
|
"loss": 0.8426, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.743643879890442, |
|
"learning_rate": 0.0038990825688073397, |
|
"loss": 0.7838, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.9074971675872803, |
|
"learning_rate": 0.0038532110091743124, |
|
"loss": 0.8546, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.6970580816268921, |
|
"learning_rate": 0.0038073394495412843, |
|
"loss": 0.8481, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 1.3498753309249878, |
|
"learning_rate": 0.003761467889908257, |
|
"loss": 0.766, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.2216682434082031, |
|
"learning_rate": 0.0037155963302752293, |
|
"loss": 0.8654, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 1.1758558750152588, |
|
"learning_rate": 0.003669724770642202, |
|
"loss": 0.8438, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.8012980222702026, |
|
"learning_rate": 0.0036238532110091743, |
|
"loss": 0.8363, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.5546942949295044, |
|
"learning_rate": 0.003577981651376147, |
|
"loss": 0.8998, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.9878047704696655, |
|
"learning_rate": 0.0035321100917431194, |
|
"loss": 0.7891, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7178464606181456, |
|
"eval_f1": 0.36668660138047887, |
|
"eval_loss": 0.7574967741966248, |
|
"eval_precision": 0.3953602798961963, |
|
"eval_recall": 0.40001083689608274, |
|
"eval_runtime": 5.9834, |
|
"eval_samples_per_second": 167.63, |
|
"eval_steps_per_second": 10.529, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 1.59013032913208, |
|
"learning_rate": 0.003486238532110092, |
|
"loss": 0.8096, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.7891358733177185, |
|
"learning_rate": 0.0034403669724770644, |
|
"loss": 0.8351, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 1.1285407543182373, |
|
"learning_rate": 0.003394495412844037, |
|
"loss": 0.8189, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 1.166321873664856, |
|
"learning_rate": 0.003348623853211009, |
|
"loss": 0.7379, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.8681693077087402, |
|
"learning_rate": 0.0033027522935779817, |
|
"loss": 0.7388, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 1.2175371646881104, |
|
"learning_rate": 0.003256880733944954, |
|
"loss": 0.8183, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 1.4015443325042725, |
|
"learning_rate": 0.003211009174311927, |
|
"loss": 0.8545, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 1.3555314540863037, |
|
"learning_rate": 0.003165137614678899, |
|
"loss": 0.8038, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 1.3774343729019165, |
|
"learning_rate": 0.003119266055045872, |
|
"loss": 0.8346, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 1.0004535913467407, |
|
"learning_rate": 0.003073394495412844, |
|
"loss": 0.7919, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.7557776570320129, |
|
"learning_rate": 0.003027522935779817, |
|
"loss": 0.7649, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7417746759720838, |
|
"eval_f1": 0.41456723287460584, |
|
"eval_loss": 0.6878895163536072, |
|
"eval_precision": 0.5009001421261136, |
|
"eval_recall": 0.3971855759911965, |
|
"eval_runtime": 5.8982, |
|
"eval_samples_per_second": 170.052, |
|
"eval_steps_per_second": 10.681, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.8360877633094788, |
|
"learning_rate": 0.002981651376146789, |
|
"loss": 0.7322, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 1.399274468421936, |
|
"learning_rate": 0.002935779816513762, |
|
"loss": 0.7425, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 1.0945734977722168, |
|
"learning_rate": 0.0028899082568807338, |
|
"loss": 0.7295, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 1.5122932195663452, |
|
"learning_rate": 0.0028440366972477065, |
|
"loss": 0.7892, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.7687821388244629, |
|
"learning_rate": 0.002798165137614679, |
|
"loss": 0.7614, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 1.25822913646698, |
|
"learning_rate": 0.0027522935779816515, |
|
"loss": 0.7811, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.7886181473731995, |
|
"learning_rate": 0.002706422018348624, |
|
"loss": 0.8093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.7840601801872253, |
|
"learning_rate": 0.0026605504587155966, |
|
"loss": 0.738, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 1.1541500091552734, |
|
"learning_rate": 0.002614678899082569, |
|
"loss": 0.8208, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 1.354428768157959, |
|
"learning_rate": 0.0025688073394495416, |
|
"loss": 0.746, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.8335555195808411, |
|
"learning_rate": 0.0025229357798165135, |
|
"loss": 0.8146, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7178464606181456, |
|
"eval_f1": 0.36409764256305227, |
|
"eval_loss": 0.7471081614494324, |
|
"eval_precision": 0.4490225302647663, |
|
"eval_recall": 0.4141043546195771, |
|
"eval_runtime": 5.7321, |
|
"eval_samples_per_second": 174.978, |
|
"eval_steps_per_second": 10.991, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 1.1929486989974976, |
|
"learning_rate": 0.0024770642201834866, |
|
"loss": 0.7308, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"grad_norm": 1.202407717704773, |
|
"learning_rate": 0.002431192660550459, |
|
"loss": 0.7956, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 1.1130154132843018, |
|
"learning_rate": 0.0023853211009174312, |
|
"loss": 0.7042, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 1.1920500993728638, |
|
"learning_rate": 0.002339449541284404, |
|
"loss": 0.7945, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 1.4452427625656128, |
|
"learning_rate": 0.0022935779816513763, |
|
"loss": 0.7757, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 1.1632148027420044, |
|
"learning_rate": 0.002247706422018349, |
|
"loss": 0.7595, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 0.8493008017539978, |
|
"learning_rate": 0.0022018348623853213, |
|
"loss": 0.7757, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 1.3676623106002808, |
|
"learning_rate": 0.0021559633027522936, |
|
"loss": 0.7664, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 0.9162562489509583, |
|
"learning_rate": 0.0021100917431192663, |
|
"loss": 0.7303, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.7179450392723083, |
|
"learning_rate": 0.0020642201834862386, |
|
"loss": 0.7088, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.7027204632759094, |
|
"learning_rate": 0.002018348623853211, |
|
"loss": 0.6831, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7367896311066799, |
|
"eval_f1": 0.42520915568023343, |
|
"eval_loss": 0.7007002830505371, |
|
"eval_precision": 0.47770601644698146, |
|
"eval_recall": 0.4148257042870626, |
|
"eval_runtime": 5.9496, |
|
"eval_samples_per_second": 168.582, |
|
"eval_steps_per_second": 10.589, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.7743313312530518, |
|
"learning_rate": 0.0019724770642201837, |
|
"loss": 0.7261, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.6318005323410034, |
|
"learning_rate": 0.0019266055045871562, |
|
"loss": 0.702, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.696121096611023, |
|
"learning_rate": 0.0018807339449541285, |
|
"loss": 0.6966, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 0.5956413149833679, |
|
"learning_rate": 0.001834862385321101, |
|
"loss": 0.7176, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 1.6218867301940918, |
|
"learning_rate": 0.0017889908256880735, |
|
"loss": 0.7223, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 1.1353025436401367, |
|
"learning_rate": 0.001743119266055046, |
|
"loss": 0.7098, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 0.7476430535316467, |
|
"learning_rate": 0.0016972477064220186, |
|
"loss": 0.6411, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 1.3945239782333374, |
|
"learning_rate": 0.0016513761467889909, |
|
"loss": 0.7219, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 0.9898785948753357, |
|
"learning_rate": 0.0016055045871559634, |
|
"loss": 0.7456, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 0.7352449893951416, |
|
"learning_rate": 0.001559633027522936, |
|
"loss": 0.7057, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 1.0120513439178467, |
|
"learning_rate": 0.0015137614678899084, |
|
"loss": 0.695, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7427716849451645, |
|
"eval_f1": 0.48411655206463156, |
|
"eval_loss": 0.6797036528587341, |
|
"eval_precision": 0.4638223826629623, |
|
"eval_recall": 0.5333725550353186, |
|
"eval_runtime": 5.7522, |
|
"eval_samples_per_second": 174.367, |
|
"eval_steps_per_second": 10.952, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.297319769859314, |
|
"learning_rate": 0.001467889908256881, |
|
"loss": 0.7541, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 1.1115490198135376, |
|
"learning_rate": 0.0014220183486238532, |
|
"loss": 0.6944, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"grad_norm": 0.881907045841217, |
|
"learning_rate": 0.0013761467889908258, |
|
"loss": 0.7047, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.9110414981842041, |
|
"learning_rate": 0.0013302752293577983, |
|
"loss": 0.7227, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 0.7190865874290466, |
|
"learning_rate": 0.0012844036697247708, |
|
"loss": 0.7263, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 0.9148305654525757, |
|
"learning_rate": 0.0012385321100917433, |
|
"loss": 0.6336, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"grad_norm": 0.7972878813743591, |
|
"learning_rate": 0.0011926605504587156, |
|
"loss": 0.6886, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 0.9237717986106873, |
|
"learning_rate": 0.0011467889908256881, |
|
"loss": 0.6212, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 1.1942154169082642, |
|
"learning_rate": 0.0011009174311926607, |
|
"loss": 0.6202, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 1.2370058298110962, |
|
"learning_rate": 0.0010550458715596332, |
|
"loss": 0.703, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 1.2201330661773682, |
|
"learning_rate": 0.0010091743119266055, |
|
"loss": 0.6646, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7537387836490529, |
|
"eval_f1": 0.4932964122925023, |
|
"eval_loss": 0.6534218192100525, |
|
"eval_precision": 0.6130395728356862, |
|
"eval_recall": 0.5077119285550199, |
|
"eval_runtime": 6.1104, |
|
"eval_samples_per_second": 164.148, |
|
"eval_steps_per_second": 10.31, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.904547393321991, |
|
"learning_rate": 0.0009633027522935781, |
|
"loss": 0.6991, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 0.7196776270866394, |
|
"learning_rate": 0.0009174311926605505, |
|
"loss": 0.612, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.1908409595489502, |
|
"learning_rate": 0.000871559633027523, |
|
"loss": 0.6583, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 1.1398776769638062, |
|
"learning_rate": 0.0008256880733944954, |
|
"loss": 0.7221, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.867472767829895, |
|
"learning_rate": 0.000779816513761468, |
|
"loss": 0.6012, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.7770695686340332, |
|
"learning_rate": 0.0007339449541284405, |
|
"loss": 0.6763, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 1.4209235906600952, |
|
"learning_rate": 0.0006880733944954129, |
|
"loss": 0.6451, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 1.1582767963409424, |
|
"learning_rate": 0.0006422018348623854, |
|
"loss": 0.6745, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 1.2608932256698608, |
|
"learning_rate": 0.0005963302752293578, |
|
"loss": 0.6464, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.7971704006195068, |
|
"learning_rate": 0.0005504587155963303, |
|
"loss": 0.6251, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 1.318163275718689, |
|
"learning_rate": 0.0005045871559633027, |
|
"loss": 0.675, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7666999002991027, |
|
"eval_f1": 0.5308101410608023, |
|
"eval_loss": 0.6237577795982361, |
|
"eval_precision": 0.6518028517787527, |
|
"eval_recall": 0.5430883155707511, |
|
"eval_runtime": 5.972, |
|
"eval_samples_per_second": 167.95, |
|
"eval_steps_per_second": 10.549, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.9488193392753601, |
|
"learning_rate": 0.00045871559633027525, |
|
"loss": 0.6078, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 0.8466370105743408, |
|
"learning_rate": 0.0004128440366972477, |
|
"loss": 0.6368, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 1.245497465133667, |
|
"learning_rate": 0.00036697247706422024, |
|
"loss": 0.6639, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 1.0719431638717651, |
|
"learning_rate": 0.0003211009174311927, |
|
"loss": 0.5846, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.8983196020126343, |
|
"learning_rate": 0.00027522935779816516, |
|
"loss": 0.663, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.8495015501976013, |
|
"learning_rate": 0.00022935779816513763, |
|
"loss": 0.5945, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 1.3694357872009277, |
|
"learning_rate": 0.00018348623853211012, |
|
"loss": 0.6575, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 1.014473557472229, |
|
"learning_rate": 0.00013761467889908258, |
|
"loss": 0.6433, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 1.0054584741592407, |
|
"learning_rate": 9.174311926605506e-05, |
|
"loss": 0.6797, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"grad_norm": 1.37436044216156, |
|
"learning_rate": 4.587155963302753e-05, |
|
"loss": 0.6579, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 0.9838040471076965, |
|
"learning_rate": 0.0, |
|
"loss": 0.6145, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.7726819541375872, |
|
"eval_f1": 0.5282805571871881, |
|
"eval_loss": 0.6095667481422424, |
|
"eval_precision": 0.6426600715638663, |
|
"eval_recall": 0.5346360087577886, |
|
"eval_runtime": 6.0311, |
|
"eval_samples_per_second": 166.305, |
|
"eval_steps_per_second": 10.446, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"step": 1090, |
|
"total_flos": 5.440571948014866e+18, |
|
"train_loss": 0.7656277652180523, |
|
"train_runtime": 925.5702, |
|
"train_samples_per_second": 75.705, |
|
"train_steps_per_second": 1.178 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.440571948014866e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|