Phi_medprob-anatomy_lora / trainer_state.json
emilykang's picture
Training in progress, epoch 0
e6fcbec verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.914529914529915,
"eval_steps": 500,
"global_step": 870,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11396011396011396,
"grad_norm": 0.158817857503891,
"learning_rate": 0.0001999348095389677,
"loss": 0.9924,
"step": 10
},
{
"epoch": 0.22792022792022792,
"grad_norm": 0.21280939877033234,
"learning_rate": 0.000199739323151795,
"loss": 0.819,
"step": 20
},
{
"epoch": 0.3418803418803419,
"grad_norm": 0.22974510490894318,
"learning_rate": 0.00019941379571543596,
"loss": 0.767,
"step": 30
},
{
"epoch": 0.45584045584045585,
"grad_norm": 0.20720455050468445,
"learning_rate": 0.00019895865165556377,
"loss": 0.6948,
"step": 40
},
{
"epoch": 0.5698005698005698,
"grad_norm": 0.1902514398097992,
"learning_rate": 0.00019837448439320027,
"loss": 0.6509,
"step": 50
},
{
"epoch": 0.6837606837606838,
"grad_norm": 0.18335820734500885,
"learning_rate": 0.00019766205557100868,
"loss": 0.6344,
"step": 60
},
{
"epoch": 0.7977207977207977,
"grad_norm": 0.17900069057941437,
"learning_rate": 0.00019682229406025635,
"loss": 0.6447,
"step": 70
},
{
"epoch": 0.9116809116809117,
"grad_norm": 0.16915330290794373,
"learning_rate": 0.00019585629474974415,
"loss": 0.6335,
"step": 80
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.16036000847816467,
"learning_rate": 0.00019476531711828027,
"loss": 0.634,
"step": 90
},
{
"epoch": 1.1396011396011396,
"grad_norm": 0.16852639615535736,
"learning_rate": 0.0001935507835925601,
"loss": 0.6058,
"step": 100
},
{
"epoch": 1.2535612535612537,
"grad_norm": 0.15856905281543732,
"learning_rate": 0.00019221427769259333,
"loss": 0.5902,
"step": 110
},
{
"epoch": 1.3675213675213675,
"grad_norm": 0.16909192502498627,
"learning_rate": 0.00019075754196709572,
"loss": 0.6051,
"step": 120
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.1899166703224182,
"learning_rate": 0.00018918247572153823,
"loss": 0.6098,
"step": 130
},
{
"epoch": 1.5954415954415955,
"grad_norm": 0.17596793174743652,
"learning_rate": 0.00018749113254181498,
"loss": 0.597,
"step": 140
},
{
"epoch": 1.7094017094017095,
"grad_norm": 0.16560517251491547,
"learning_rate": 0.00018568571761675893,
"loss": 0.5899,
"step": 150
},
{
"epoch": 1.8233618233618234,
"grad_norm": 0.16513986885547638,
"learning_rate": 0.00018376858486299647,
"loss": 0.5989,
"step": 160
},
{
"epoch": 1.9373219373219372,
"grad_norm": 0.20360782742500305,
"learning_rate": 0.00018174223385588917,
"loss": 0.5982,
"step": 170
},
{
"epoch": 2.051282051282051,
"grad_norm": 0.16155321896076202,
"learning_rate": 0.00017960930657056438,
"loss": 0.593,
"step": 180
},
{
"epoch": 2.1652421652421654,
"grad_norm": 0.1811763048171997,
"learning_rate": 0.00017737258393728364,
"loss": 0.6077,
"step": 190
},
{
"epoch": 2.2792022792022792,
"grad_norm": 0.16952063143253326,
"learning_rate": 0.00017503498221564025,
"loss": 0.5749,
"step": 200
},
{
"epoch": 2.393162393162393,
"grad_norm": 0.17240603268146515,
"learning_rate": 0.0001725995491923131,
"loss": 0.5592,
"step": 210
},
{
"epoch": 2.5071225071225074,
"grad_norm": 0.1657334417104721,
"learning_rate": 0.00017006946020733425,
"loss": 0.5779,
"step": 220
},
{
"epoch": 2.6210826210826212,
"grad_norm": 0.16417497396469116,
"learning_rate": 0.0001674480140140514,
"loss": 0.5675,
"step": 230
},
{
"epoch": 2.735042735042735,
"grad_norm": 0.174308180809021,
"learning_rate": 0.00016473862847818277,
"loss": 0.5977,
"step": 240
},
{
"epoch": 2.849002849002849,
"grad_norm": 0.17116901278495789,
"learning_rate": 0.0001619448361215723,
"loss": 0.5582,
"step": 250
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.16816489398479462,
"learning_rate": 0.0001590702795164551,
"loss": 0.5813,
"step": 260
},
{
"epoch": 3.076923076923077,
"grad_norm": 0.17530137300491333,
"learning_rate": 0.00015611870653623825,
"loss": 0.559,
"step": 270
},
{
"epoch": 3.190883190883191,
"grad_norm": 0.1744232326745987,
"learning_rate": 0.0001530939654689887,
"loss": 0.5668,
"step": 280
},
{
"epoch": 3.304843304843305,
"grad_norm": 0.1809006929397583,
"learning_rate": 0.00015000000000000001,
"loss": 0.5754,
"step": 290
},
{
"epoch": 3.4188034188034186,
"grad_norm": 0.16484159231185913,
"learning_rate": 0.00014684084406997903,
"loss": 0.5731,
"step": 300
},
{
"epoch": 3.532763532763533,
"grad_norm": 0.19075918197631836,
"learning_rate": 0.00014362061661555675,
"loss": 0.5496,
"step": 310
},
{
"epoch": 3.646723646723647,
"grad_norm": 0.18451079726219177,
"learning_rate": 0.00014034351619898088,
"loss": 0.5463,
"step": 320
},
{
"epoch": 3.7606837606837606,
"grad_norm": 0.18566997349262238,
"learning_rate": 0.00013701381553399145,
"loss": 0.5768,
"step": 330
},
{
"epoch": 3.8746438746438745,
"grad_norm": 0.1669853925704956,
"learning_rate": 0.0001336358559150175,
"loss": 0.5606,
"step": 340
},
{
"epoch": 3.9886039886039883,
"grad_norm": 0.17847082018852234,
"learning_rate": 0.00013021404155695725,
"loss": 0.5756,
"step": 350
},
{
"epoch": 4.102564102564102,
"grad_norm": 0.16660483181476593,
"learning_rate": 0.00012675283385292212,
"loss": 0.5585,
"step": 360
},
{
"epoch": 4.216524216524217,
"grad_norm": 0.17163340747356415,
"learning_rate": 0.00012325674555743106,
"loss": 0.5434,
"step": 370
},
{
"epoch": 4.330484330484331,
"grad_norm": 0.16264410316944122,
"learning_rate": 0.00011973033490264001,
"loss": 0.5449,
"step": 380
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.17614829540252686,
"learning_rate": 0.0001161781996552765,
"loss": 0.5574,
"step": 390
},
{
"epoch": 4.5584045584045585,
"grad_norm": 0.19437584280967712,
"learning_rate": 0.00011260497112202895,
"loss": 0.5448,
"step": 400
},
{
"epoch": 4.672364672364672,
"grad_norm": 0.19045701622962952,
"learning_rate": 0.00010901530811120655,
"loss": 0.5474,
"step": 410
},
{
"epoch": 4.786324786324786,
"grad_norm": 0.21330882608890533,
"learning_rate": 0.00010541389085854176,
"loss": 0.5552,
"step": 420
},
{
"epoch": 4.9002849002849,
"grad_norm": 0.17429402470588684,
"learning_rate": 0.00010180541492505604,
"loss": 0.5495,
"step": 430
},
{
"epoch": 5.014245014245014,
"grad_norm": 0.17785826325416565,
"learning_rate": 9.819458507494394e-05,
"loss": 0.5583,
"step": 440
},
{
"epoch": 5.128205128205128,
"grad_norm": 0.19076977670192719,
"learning_rate": 9.458610914145826e-05,
"loss": 0.5291,
"step": 450
},
{
"epoch": 5.2421652421652425,
"grad_norm": 0.19988471269607544,
"learning_rate": 9.098469188879349e-05,
"loss": 0.5311,
"step": 460
},
{
"epoch": 5.356125356125356,
"grad_norm": 0.19638335704803467,
"learning_rate": 8.739502887797107e-05,
"loss": 0.5684,
"step": 470
},
{
"epoch": 5.47008547008547,
"grad_norm": 0.2043437659740448,
"learning_rate": 8.382180034472353e-05,
"loss": 0.5371,
"step": 480
},
{
"epoch": 5.584045584045584,
"grad_norm": 0.2045976221561432,
"learning_rate": 8.026966509736001e-05,
"loss": 0.5307,
"step": 490
},
{
"epoch": 5.698005698005698,
"grad_norm": 0.21237310767173767,
"learning_rate": 7.674325444256899e-05,
"loss": 0.5483,
"step": 500
},
{
"epoch": 5.811965811965812,
"grad_norm": 0.22306476533412933,
"learning_rate": 7.324716614707793e-05,
"loss": 0.5572,
"step": 510
},
{
"epoch": 5.925925925925926,
"grad_norm": 0.20065273344516754,
"learning_rate": 6.978595844304271e-05,
"loss": 0.5363,
"step": 520
},
{
"epoch": 6.0398860398860394,
"grad_norm": 0.21213628351688385,
"learning_rate": 6.636414408498249e-05,
"loss": 0.521,
"step": 530
},
{
"epoch": 6.153846153846154,
"grad_norm": 0.1936779022216797,
"learning_rate": 6.298618446600856e-05,
"loss": 0.5283,
"step": 540
},
{
"epoch": 6.267806267806268,
"grad_norm": 0.19564631581306458,
"learning_rate": 5.965648380101916e-05,
"loss": 0.5301,
"step": 550
},
{
"epoch": 6.381766381766382,
"grad_norm": 0.20069913566112518,
"learning_rate": 5.6379383384443255e-05,
"loss": 0.5204,
"step": 560
},
{
"epoch": 6.495726495726496,
"grad_norm": 0.21325626969337463,
"learning_rate": 5.3159155930021e-05,
"loss": 0.5419,
"step": 570
},
{
"epoch": 6.60968660968661,
"grad_norm": 0.21303197741508484,
"learning_rate": 5.000000000000002e-05,
"loss": 0.543,
"step": 580
},
{
"epoch": 6.7236467236467234,
"grad_norm": 0.21136346459388733,
"learning_rate": 4.6906034531011346e-05,
"loss": 0.5217,
"step": 590
},
{
"epoch": 6.837606837606837,
"grad_norm": 0.21392931044101715,
"learning_rate": 4.388129346376178e-05,
"loss": 0.5288,
"step": 600
},
{
"epoch": 6.951566951566951,
"grad_norm": 0.22880437970161438,
"learning_rate": 4.092972048354491e-05,
"loss": 0.5273,
"step": 610
},
{
"epoch": 7.065527065527066,
"grad_norm": 0.21491903066635132,
"learning_rate": 3.80551638784277e-05,
"loss": 0.5332,
"step": 620
},
{
"epoch": 7.17948717948718,
"grad_norm": 0.26633119583129883,
"learning_rate": 3.5261371521817244e-05,
"loss": 0.5239,
"step": 630
},
{
"epoch": 7.293447293447294,
"grad_norm": 0.23685385286808014,
"learning_rate": 3.2551985985948616e-05,
"loss": 0.5309,
"step": 640
},
{
"epoch": 7.407407407407407,
"grad_norm": 0.22292840480804443,
"learning_rate": 2.993053979266577e-05,
"loss": 0.5372,
"step": 650
},
{
"epoch": 7.521367521367521,
"grad_norm": 0.2220107614994049,
"learning_rate": 2.7400450807686938e-05,
"loss": 0.5083,
"step": 660
},
{
"epoch": 7.635327635327635,
"grad_norm": 0.2191537618637085,
"learning_rate": 2.496501778435977e-05,
"loss": 0.5164,
"step": 670
},
{
"epoch": 7.749287749287749,
"grad_norm": 0.22593119740486145,
"learning_rate": 2.2627416062716366e-05,
"loss": 0.5152,
"step": 680
},
{
"epoch": 7.863247863247864,
"grad_norm": 0.23532789945602417,
"learning_rate": 2.0390693429435627e-05,
"loss": 0.5269,
"step": 690
},
{
"epoch": 7.977207977207978,
"grad_norm": 0.25111591815948486,
"learning_rate": 1.825776614411082e-05,
"loss": 0.5335,
"step": 700
},
{
"epoch": 8.091168091168091,
"grad_norm": 0.21956747770309448,
"learning_rate": 1.6231415137003537e-05,
"loss": 0.5144,
"step": 710
},
{
"epoch": 8.205128205128204,
"grad_norm": 0.23355403542518616,
"learning_rate": 1.4314282383241096e-05,
"loss": 0.5294,
"step": 720
},
{
"epoch": 8.31908831908832,
"grad_norm": 0.23712006211280823,
"learning_rate": 1.2508867458185037e-05,
"loss": 0.5229,
"step": 730
},
{
"epoch": 8.433048433048434,
"grad_norm": 0.22506175935268402,
"learning_rate": 1.0817524278461776e-05,
"loss": 0.5212,
"step": 740
},
{
"epoch": 8.547008547008547,
"grad_norm": 0.21853385865688324,
"learning_rate": 9.242458032904311e-06,
"loss": 0.5193,
"step": 750
},
{
"epoch": 8.660968660968662,
"grad_norm": 0.23257511854171753,
"learning_rate": 7.785722307406684e-06,
"loss": 0.5039,
"step": 760
},
{
"epoch": 8.774928774928775,
"grad_norm": 0.21563945710659027,
"learning_rate": 6.4492164074399065e-06,
"loss": 0.5232,
"step": 770
},
{
"epoch": 8.88888888888889,
"grad_norm": 0.22108329832553864,
"learning_rate": 5.2346828817197655e-06,
"loss": 0.5309,
"step": 780
},
{
"epoch": 9.002849002849002,
"grad_norm": 0.22330021858215332,
"learning_rate": 4.143705250255869e-06,
"loss": 0.5287,
"step": 790
},
{
"epoch": 9.116809116809117,
"grad_norm": 0.22394247353076935,
"learning_rate": 3.1777059397436692e-06,
"loss": 0.5007,
"step": 800
},
{
"epoch": 9.23076923076923,
"grad_norm": 0.2144930511713028,
"learning_rate": 2.3379444289913342e-06,
"loss": 0.5277,
"step": 810
},
{
"epoch": 9.344729344729345,
"grad_norm": 0.2214236557483673,
"learning_rate": 1.6255156067997323e-06,
"loss": 0.5173,
"step": 820
},
{
"epoch": 9.45868945868946,
"grad_norm": 0.2192196100950241,
"learning_rate": 1.0413483444362771e-06,
"loss": 0.5123,
"step": 830
},
{
"epoch": 9.572649572649572,
"grad_norm": 0.22837017476558685,
"learning_rate": 5.862042845640403e-07,
"loss": 0.5279,
"step": 840
},
{
"epoch": 9.686609686609687,
"grad_norm": 0.21172335743904114,
"learning_rate": 2.606768482050215e-07,
"loss": 0.5263,
"step": 850
},
{
"epoch": 9.8005698005698,
"grad_norm": 0.23530949652194977,
"learning_rate": 6.519046103230508e-08,
"loss": 0.5202,
"step": 860
},
{
"epoch": 9.914529914529915,
"grad_norm": 0.23058444261550903,
"learning_rate": 0.0,
"loss": 0.5243,
"step": 870
},
{
"epoch": 9.914529914529915,
"step": 870,
"total_flos": 5.67984355540992e+16,
"train_loss": 0.5655439464525245,
"train_runtime": 2716.1071,
"train_samples_per_second": 1.292,
"train_steps_per_second": 0.32
}
],
"logging_steps": 10,
"max_steps": 870,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 5.67984355540992e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}