Airbnb-CB0.1-50tsk-3epoch / trainer_state.json
pbarker's picture
Upload folder using huggingface_hub
c17abc3 verified
{
"best_metric": 0.21592645,
"best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v6-20250103-052220/checkpoint-414",
"epoch": 3.0,
"eval_steps": 200,
"global_step": 414,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.95261294,
"epoch": 0.007272727272727273,
"grad_norm": 1.9728064130980805,
"learning_rate": 0.0,
"loss": 0.22823659,
"memory(GiB)": 31.98,
"step": 1,
"train_speed(iter/s)": 0.054965
},
{
"acc": 0.93504357,
"epoch": 0.03636363636363636,
"grad_norm": 2.237331671502764,
"learning_rate": 4.526808302869428e-06,
"loss": 0.35742611,
"memory(GiB)": 33.01,
"step": 5,
"train_speed(iter/s)": 0.120168
},
{
"acc": 0.91343489,
"epoch": 0.07272727272727272,
"grad_norm": 3.274625633110072,
"learning_rate": 6.476398521807292e-06,
"loss": 0.45717983,
"memory(GiB)": 33.01,
"step": 10,
"train_speed(iter/s)": 0.14108
},
{
"acc": 0.93629456,
"epoch": 0.10909090909090909,
"grad_norm": 3.424189078760059,
"learning_rate": 7.6168356916586906e-06,
"loss": 0.34272318,
"memory(GiB)": 33.01,
"step": 15,
"train_speed(iter/s)": 0.149645
},
{
"acc": 0.92510157,
"epoch": 0.14545454545454545,
"grad_norm": 2.2347104663373494,
"learning_rate": 8.425988740745155e-06,
"loss": 0.38463933,
"memory(GiB)": 33.01,
"step": 20,
"train_speed(iter/s)": 0.154322
},
{
"acc": 0.92444115,
"epoch": 0.18181818181818182,
"grad_norm": 2.999692166672811,
"learning_rate": 9.053616605738856e-06,
"loss": 0.38348885,
"memory(GiB)": 33.01,
"step": 25,
"train_speed(iter/s)": 0.15729
},
{
"acc": 0.89717693,
"epoch": 0.21818181818181817,
"grad_norm": 3.151051933103401,
"learning_rate": 9.566425910596553e-06,
"loss": 0.50090494,
"memory(GiB)": 33.01,
"step": 30,
"train_speed(iter/s)": 0.159318
},
{
"acc": 0.8918396,
"epoch": 0.2545454545454545,
"grad_norm": 3.929292818540404,
"learning_rate": 9.999999999999999e-06,
"loss": 0.53159599,
"memory(GiB)": 33.01,
"step": 35,
"train_speed(iter/s)": 0.160758
},
{
"acc": 0.91250048,
"epoch": 0.2909090909090909,
"grad_norm": 2.9644423662409465,
"learning_rate": 9.998540216393926e-06,
"loss": 0.44619522,
"memory(GiB)": 33.01,
"step": 40,
"train_speed(iter/s)": 0.161864
},
{
"acc": 0.91625786,
"epoch": 0.32727272727272727,
"grad_norm": 2.8793186353393256,
"learning_rate": 9.994161718048217e-06,
"loss": 0.46977191,
"memory(GiB)": 33.01,
"step": 45,
"train_speed(iter/s)": 0.162757
},
{
"acc": 0.91005774,
"epoch": 0.36363636363636365,
"grad_norm": 2.649498967933029,
"learning_rate": 9.986867061882612e-06,
"loss": 0.4955514,
"memory(GiB)": 33.01,
"step": 50,
"train_speed(iter/s)": 0.163456
},
{
"acc": 0.90930538,
"epoch": 0.4,
"grad_norm": 3.239689305737002,
"learning_rate": 9.976660507770886e-06,
"loss": 0.48896346,
"memory(GiB)": 33.01,
"step": 55,
"train_speed(iter/s)": 0.164017
},
{
"acc": 0.89578419,
"epoch": 0.43636363636363634,
"grad_norm": 4.172977184581541,
"learning_rate": 9.963548016053221e-06,
"loss": 0.53288736,
"memory(GiB)": 33.01,
"step": 60,
"train_speed(iter/s)": 0.164492
},
{
"acc": 0.90814514,
"epoch": 0.4727272727272727,
"grad_norm": 2.4035166321477672,
"learning_rate": 9.94753724405553e-06,
"loss": 0.43980942,
"memory(GiB)": 33.01,
"step": 65,
"train_speed(iter/s)": 0.164893
},
{
"acc": 0.9009388,
"epoch": 0.509090909090909,
"grad_norm": 2.792817778420491,
"learning_rate": 9.92863754161779e-06,
"loss": 0.52926388,
"memory(GiB)": 33.01,
"step": 70,
"train_speed(iter/s)": 0.165233
},
{
"acc": 0.90013046,
"epoch": 0.5454545454545454,
"grad_norm": 2.6334892567714436,
"learning_rate": 9.906859945633999e-06,
"loss": 0.49731359,
"memory(GiB)": 33.01,
"step": 75,
"train_speed(iter/s)": 0.165533
},
{
"acc": 0.9045433,
"epoch": 0.5818181818181818,
"grad_norm": 2.332185471916977,
"learning_rate": 9.882217173606948e-06,
"loss": 0.51631742,
"memory(GiB)": 33.01,
"step": 80,
"train_speed(iter/s)": 0.165793
},
{
"acc": 0.92099228,
"epoch": 0.6181818181818182,
"grad_norm": 2.3620310828105935,
"learning_rate": 9.854723616221548e-06,
"loss": 0.41591549,
"memory(GiB)": 33.01,
"step": 85,
"train_speed(iter/s)": 0.166024
},
{
"acc": 0.91060734,
"epoch": 0.6545454545454545,
"grad_norm": 2.2844767249274214,
"learning_rate": 9.824395328941086e-06,
"loss": 0.44734068,
"memory(GiB)": 33.01,
"step": 90,
"train_speed(iter/s)": 0.16623
},
{
"acc": 0.91947842,
"epoch": 0.6909090909090909,
"grad_norm": 2.654847718943916,
"learning_rate": 9.791250022631277e-06,
"loss": 0.4483614,
"memory(GiB)": 33.01,
"step": 95,
"train_speed(iter/s)": 0.166415
},
{
"acc": 0.90607023,
"epoch": 0.7272727272727273,
"grad_norm": 2.846204394974626,
"learning_rate": 9.755307053217622e-06,
"loss": 0.47331667,
"memory(GiB)": 33.01,
"step": 100,
"train_speed(iter/s)": 0.16658
},
{
"acc": 0.91776829,
"epoch": 0.7636363636363637,
"grad_norm": 4.193055969054884,
"learning_rate": 9.716587410382087e-06,
"loss": 0.45284424,
"memory(GiB)": 33.01,
"step": 105,
"train_speed(iter/s)": 0.166736
},
{
"acc": 0.90584297,
"epoch": 0.8,
"grad_norm": 2.3839939588937082,
"learning_rate": 9.675113705305733e-06,
"loss": 0.49337497,
"memory(GiB)": 33.01,
"step": 110,
"train_speed(iter/s)": 0.166886
},
{
"acc": 0.90228806,
"epoch": 0.8363636363636363,
"grad_norm": 2.6045227101709214,
"learning_rate": 9.630910157464404e-06,
"loss": 0.48910527,
"memory(GiB)": 33.01,
"step": 115,
"train_speed(iter/s)": 0.16701
},
{
"acc": 0.90632572,
"epoch": 0.8727272727272727,
"grad_norm": 2.1215112041093906,
"learning_rate": 9.584002580485256e-06,
"loss": 0.4827064,
"memory(GiB)": 33.01,
"step": 120,
"train_speed(iter/s)": 0.16713
},
{
"acc": 0.91771622,
"epoch": 0.9090909090909091,
"grad_norm": 2.8141571001065424,
"learning_rate": 9.534418367072303e-06,
"loss": 0.40849586,
"memory(GiB)": 33.01,
"step": 125,
"train_speed(iter/s)": 0.167246
},
{
"acc": 0.89680672,
"epoch": 0.9454545454545454,
"grad_norm": 2.048597621204809,
"learning_rate": 9.482186473009871e-06,
"loss": 0.52411914,
"memory(GiB)": 33.01,
"step": 130,
"train_speed(iter/s)": 0.167353
},
{
"acc": 0.90298738,
"epoch": 0.9818181818181818,
"grad_norm": 2.887900153774404,
"learning_rate": 9.427337400253224e-06,
"loss": 0.5198132,
"memory(GiB)": 33.01,
"step": 135,
"train_speed(iter/s)": 0.167445
},
{
"epoch": 1.0,
"eval_acc": 0.9165654110976104,
"eval_loss": 0.2242395281791687,
"eval_runtime": 12.914,
"eval_samples_per_second": 8.983,
"eval_steps_per_second": 1.162,
"step": 138
},
{
"acc": 0.8111371,
"epoch": 1.0145454545454546,
"grad_norm": 2.8557900772261893,
"learning_rate": 9.381595246879296e-06,
"loss": 0.41494226,
"memory(GiB)": 33.01,
"step": 140,
"train_speed(iter/s)": 0.155909
},
{
"acc": 0.92820644,
"epoch": 1.050909090909091,
"grad_norm": 3.457089179525002,
"learning_rate": 9.322116984002575e-06,
"loss": 0.36117687,
"memory(GiB)": 33.01,
"step": 145,
"train_speed(iter/s)": 0.15618
},
{
"acc": 0.91620007,
"epoch": 1.0872727272727274,
"grad_norm": 2.202599146322218,
"learning_rate": 9.260115018505599e-06,
"loss": 0.44102664,
"memory(GiB)": 33.01,
"step": 150,
"train_speed(iter/s)": 0.156606
},
{
"acc": 0.91883535,
"epoch": 1.1236363636363635,
"grad_norm": 2.3114291842279697,
"learning_rate": 9.195625557790217e-06,
"loss": 0.41582127,
"memory(GiB)": 33.01,
"step": 155,
"train_speed(iter/s)": 0.156919
},
{
"acc": 0.93434258,
"epoch": 1.16,
"grad_norm": 5.406474002236419,
"learning_rate": 9.128686261885441e-06,
"loss": 0.35148592,
"memory(GiB)": 33.01,
"step": 160,
"train_speed(iter/s)": 0.157294
},
{
"acc": 0.91467266,
"epoch": 1.1963636363636363,
"grad_norm": 3.861068773297179,
"learning_rate": 9.059336221455045e-06,
"loss": 0.42474766,
"memory(GiB)": 33.01,
"step": 165,
"train_speed(iter/s)": 0.157643
},
{
"acc": 0.91754112,
"epoch": 1.2327272727272727,
"grad_norm": 3.4508767564011182,
"learning_rate": 8.987615934969708e-06,
"loss": 0.44358654,
"memory(GiB)": 33.01,
"step": 170,
"train_speed(iter/s)": 0.157971
},
{
"acc": 0.89289169,
"epoch": 1.269090909090909,
"grad_norm": 2.9350868391082434,
"learning_rate": 8.913567285057077e-06,
"loss": 0.51100779,
"memory(GiB)": 33.01,
"step": 175,
"train_speed(iter/s)": 0.158289
},
{
"acc": 0.91759186,
"epoch": 1.3054545454545454,
"grad_norm": 2.3549681642790072,
"learning_rate": 8.837233514043455e-06,
"loss": 0.40699306,
"memory(GiB)": 33.01,
"step": 180,
"train_speed(iter/s)": 0.158589
},
{
"acc": 0.9234314,
"epoch": 1.3418181818181818,
"grad_norm": 2.097178456402854,
"learning_rate": 8.758659198701528e-06,
"loss": 0.38527017,
"memory(GiB)": 33.01,
"step": 185,
"train_speed(iter/s)": 0.158868
},
{
"acc": 0.90927277,
"epoch": 1.3781818181818182,
"grad_norm": 2.0240919955649277,
"learning_rate": 8.677890224218774e-06,
"loss": 0.43820124,
"memory(GiB)": 33.01,
"step": 190,
"train_speed(iter/s)": 0.159135
},
{
"acc": 0.92771797,
"epoch": 1.4145454545454546,
"grad_norm": 1.8263954768378603,
"learning_rate": 8.594973757401804e-06,
"loss": 0.38704326,
"memory(GiB)": 33.01,
"step": 195,
"train_speed(iter/s)": 0.159384
},
{
"acc": 0.92044001,
"epoch": 1.450909090909091,
"grad_norm": 2.1904738367354772,
"learning_rate": 8.50995821913228e-06,
"loss": 0.40301366,
"memory(GiB)": 33.01,
"step": 200,
"train_speed(iter/s)": 0.159623
},
{
"acc": 0.91810284,
"epoch": 1.4872727272727273,
"grad_norm": 2.322191659362731,
"learning_rate": 8.42289325609048e-06,
"loss": 0.42446461,
"memory(GiB)": 33.01,
"step": 205,
"train_speed(iter/s)": 0.159854
},
{
"acc": 0.91816349,
"epoch": 1.5236363636363637,
"grad_norm": 3.6766273070353996,
"learning_rate": 8.333829711763038e-06,
"loss": 0.40156956,
"memory(GiB)": 33.01,
"step": 210,
"train_speed(iter/s)": 0.160076
},
{
"acc": 0.9263833,
"epoch": 1.56,
"grad_norm": 4.222791406049376,
"learning_rate": 8.242819596751778e-06,
"loss": 0.40653038,
"memory(GiB)": 33.01,
"step": 215,
"train_speed(iter/s)": 0.160284
},
{
"acc": 0.90884829,
"epoch": 1.5963636363636362,
"grad_norm": 2.774384141884033,
"learning_rate": 8.149916058400986e-06,
"loss": 0.4541821,
"memory(GiB)": 33.01,
"step": 220,
"train_speed(iter/s)": 0.160482
},
{
"acc": 0.92306767,
"epoch": 1.6327272727272728,
"grad_norm": 1.8808774186906907,
"learning_rate": 8.055173349760858e-06,
"loss": 0.38071015,
"memory(GiB)": 33.01,
"step": 225,
"train_speed(iter/s)": 0.160675
},
{
"acc": 0.92462444,
"epoch": 1.669090909090909,
"grad_norm": 2.6660330848450147,
"learning_rate": 7.958646797905251e-06,
"loss": 0.40280228,
"memory(GiB)": 33.01,
"step": 230,
"train_speed(iter/s)": 0.160867
},
{
"acc": 0.90507927,
"epoch": 1.7054545454545456,
"grad_norm": 2.396279628543684,
"learning_rate": 7.860392771622222e-06,
"loss": 0.49732876,
"memory(GiB)": 33.01,
"step": 235,
"train_speed(iter/s)": 0.161052
},
{
"acc": 0.90813217,
"epoch": 1.7418181818181817,
"grad_norm": 1.959098632103441,
"learning_rate": 7.760468648496251e-06,
"loss": 0.45160952,
"memory(GiB)": 33.01,
"step": 240,
"train_speed(iter/s)": 0.161223
},
{
"acc": 0.90353422,
"epoch": 1.7781818181818183,
"grad_norm": 3.350380496148853,
"learning_rate": 7.658932781401341e-06,
"loss": 0.49037123,
"memory(GiB)": 33.01,
"step": 245,
"train_speed(iter/s)": 0.161388
},
{
"acc": 0.91233768,
"epoch": 1.8145454545454545,
"grad_norm": 2.375193909280913,
"learning_rate": 7.5558444644245855e-06,
"loss": 0.45447264,
"memory(GiB)": 33.01,
"step": 250,
"train_speed(iter/s)": 0.161547
},
{
"acc": 0.90856533,
"epoch": 1.850909090909091,
"grad_norm": 2.7061925296961937,
"learning_rate": 7.451263898240091e-06,
"loss": 0.47929668,
"memory(GiB)": 33.01,
"step": 255,
"train_speed(iter/s)": 0.161704
},
{
"acc": 0.92107229,
"epoch": 1.8872727272727272,
"grad_norm": 3.242205419295514,
"learning_rate": 7.345252154953482e-06,
"loss": 0.39586713,
"memory(GiB)": 33.01,
"step": 260,
"train_speed(iter/s)": 0.161853
},
{
"acc": 0.91105738,
"epoch": 1.9236363636363636,
"grad_norm": 3.5795963065620744,
"learning_rate": 7.2378711424375056e-06,
"loss": 0.46428795,
"memory(GiB)": 33.01,
"step": 265,
"train_speed(iter/s)": 0.161996
},
{
"acc": 0.92524033,
"epoch": 1.96,
"grad_norm": 3.652004803975923,
"learning_rate": 7.129183568179582e-06,
"loss": 0.34946482,
"memory(GiB)": 33.01,
"step": 270,
"train_speed(iter/s)": 0.162127
},
{
"acc": 0.91721325,
"epoch": 1.9963636363636363,
"grad_norm": 2.386708723548124,
"learning_rate": 7.019252902662391e-06,
"loss": 0.45530472,
"memory(GiB)": 33.01,
"step": 275,
"train_speed(iter/s)": 0.162264
},
{
"epoch": 2.0,
"eval_acc": 0.9177804779262859,
"eval_loss": 0.2162669152021408,
"eval_runtime": 12.9627,
"eval_samples_per_second": 8.949,
"eval_steps_per_second": 1.157,
"step": 276
},
{
"acc": 0.83070297,
"epoch": 2.0290909090909093,
"grad_norm": 2.535544826305509,
"learning_rate": 6.908143342298905e-06,
"loss": 0.36057291,
"memory(GiB)": 33.01,
"step": 280,
"train_speed(iter/s)": 0.156683
},
{
"acc": 0.93970337,
"epoch": 2.0654545454545454,
"grad_norm": 2.276921539430186,
"learning_rate": 6.795919771943491e-06,
"loss": 0.32403946,
"memory(GiB)": 33.01,
"step": 285,
"train_speed(iter/s)": 0.156901
},
{
"acc": 0.92168732,
"epoch": 2.101818181818182,
"grad_norm": 4.127638603634917,
"learning_rate": 6.682647727000975e-06,
"loss": 0.41061325,
"memory(GiB)": 33.01,
"step": 290,
"train_speed(iter/s)": 0.157112
},
{
"acc": 0.92185087,
"epoch": 2.138181818181818,
"grad_norm": 2.613159698814904,
"learning_rate": 6.5683933551558196e-06,
"loss": 0.41276412,
"memory(GiB)": 33.01,
"step": 295,
"train_speed(iter/s)": 0.157311
},
{
"acc": 0.93119574,
"epoch": 2.174545454545455,
"grad_norm": 2.4146244471425997,
"learning_rate": 6.4532233777437355e-06,
"loss": 0.34114873,
"memory(GiB)": 33.01,
"step": 300,
"train_speed(iter/s)": 0.157504
},
{
"acc": 0.92488451,
"epoch": 2.210909090909091,
"grad_norm": 2.907208779112958,
"learning_rate": 6.337205050788301e-06,
"loss": 0.38522263,
"memory(GiB)": 33.01,
"step": 305,
"train_speed(iter/s)": 0.15769
},
{
"acc": 0.92167654,
"epoch": 2.247272727272727,
"grad_norm": 2.9896138007592024,
"learning_rate": 6.220406125725334e-06,
"loss": 0.37358305,
"memory(GiB)": 33.01,
"step": 310,
"train_speed(iter/s)": 0.15787
},
{
"acc": 0.9312851,
"epoch": 2.2836363636363637,
"grad_norm": 4.445824844631244,
"learning_rate": 6.102894809837971e-06,
"loss": 0.36833365,
"memory(GiB)": 33.01,
"step": 315,
"train_speed(iter/s)": 0.158047
},
{
"acc": 0.93003368,
"epoch": 2.32,
"grad_norm": 2.3181584670775464,
"learning_rate": 5.984739726425528e-06,
"loss": 0.36130757,
"memory(GiB)": 33.01,
"step": 320,
"train_speed(iter/s)": 0.15822
},
{
"acc": 0.91338739,
"epoch": 2.3563636363636364,
"grad_norm": 2.3002867827492235,
"learning_rate": 5.866009874729421e-06,
"loss": 0.44016724,
"memory(GiB)": 33.01,
"step": 325,
"train_speed(iter/s)": 0.158388
},
{
"acc": 0.93581047,
"epoch": 2.3927272727272726,
"grad_norm": 2.212658837030889,
"learning_rate": 5.746774589639565e-06,
"loss": 0.3611378,
"memory(GiB)": 33.01,
"step": 330,
"train_speed(iter/s)": 0.158548
},
{
"acc": 0.91852398,
"epoch": 2.429090909090909,
"grad_norm": 3.5098558925283343,
"learning_rate": 5.6271035012047395e-06,
"loss": 0.4051528,
"memory(GiB)": 33.01,
"step": 335,
"train_speed(iter/s)": 0.158706
},
{
"acc": 0.92445469,
"epoch": 2.4654545454545453,
"grad_norm": 2.3254480753766833,
"learning_rate": 5.507066493970609e-06,
"loss": 0.38657694,
"memory(GiB)": 33.01,
"step": 340,
"train_speed(iter/s)": 0.158859
},
{
"acc": 0.94319763,
"epoch": 2.501818181818182,
"grad_norm": 2.546749274650209,
"learning_rate": 5.386733666169105e-06,
"loss": 0.3047235,
"memory(GiB)": 33.01,
"step": 345,
"train_speed(iter/s)": 0.159007
},
{
"acc": 0.92226295,
"epoch": 2.538181818181818,
"grad_norm": 2.061710328743839,
"learning_rate": 5.266175288783046e-06,
"loss": 0.38313189,
"memory(GiB)": 33.01,
"step": 350,
"train_speed(iter/s)": 0.159149
},
{
"acc": 0.93342876,
"epoch": 2.5745454545454547,
"grad_norm": 3.3978986222055916,
"learning_rate": 5.1454617645098595e-06,
"loss": 0.32807801,
"memory(GiB)": 33.01,
"step": 355,
"train_speed(iter/s)": 0.159288
},
{
"acc": 0.92254868,
"epoch": 2.610909090909091,
"grad_norm": 2.36401523000578,
"learning_rate": 5.024663586648378e-06,
"loss": 0.37164879,
"memory(GiB)": 33.01,
"step": 360,
"train_speed(iter/s)": 0.159425
},
{
"acc": 0.93187628,
"epoch": 2.6472727272727274,
"grad_norm": 2.0346344864902726,
"learning_rate": 4.903851297932749e-06,
"loss": 0.35205598,
"memory(GiB)": 33.01,
"step": 365,
"train_speed(iter/s)": 0.159489
},
{
"acc": 0.92569065,
"epoch": 2.6836363636363636,
"grad_norm": 2.7461167183892883,
"learning_rate": 4.783095449337462e-06,
"loss": 0.35683942,
"memory(GiB)": 33.01,
"step": 370,
"train_speed(iter/s)": 0.159622
},
{
"acc": 0.92851734,
"epoch": 2.7199999999999998,
"grad_norm": 2.7763819290375316,
"learning_rate": 4.6624665588775675e-06,
"loss": 0.35814347,
"memory(GiB)": 33.01,
"step": 375,
"train_speed(iter/s)": 0.159722
},
{
"acc": 0.94960651,
"epoch": 2.7563636363636363,
"grad_norm": 2.4971767216968956,
"learning_rate": 4.542035070428136e-06,
"loss": 0.27659984,
"memory(GiB)": 33.01,
"step": 380,
"train_speed(iter/s)": 0.159847
},
{
"acc": 0.92807074,
"epoch": 2.792727272727273,
"grad_norm": 2.683132297237531,
"learning_rate": 4.4218713125870236e-06,
"loss": 0.37527924,
"memory(GiB)": 33.01,
"step": 385,
"train_speed(iter/s)": 0.159969
},
{
"acc": 0.93306198,
"epoch": 2.829090909090909,
"grad_norm": 2.295750889124929,
"learning_rate": 4.302045457604953e-06,
"loss": 0.33568211,
"memory(GiB)": 33.01,
"step": 390,
"train_speed(iter/s)": 0.160089
},
{
"acc": 0.92898979,
"epoch": 2.8654545454545453,
"grad_norm": 2.07143338080191,
"learning_rate": 4.182627480406894e-06,
"loss": 0.34494858,
"memory(GiB)": 33.01,
"step": 395,
"train_speed(iter/s)": 0.160203
},
{
"acc": 0.92927322,
"epoch": 2.901818181818182,
"grad_norm": 3.061148841919245,
"learning_rate": 4.063687117728671e-06,
"loss": 0.37803557,
"memory(GiB)": 33.01,
"step": 400,
"train_speed(iter/s)": 0.160316
},
{
"acc": 0.93406506,
"epoch": 2.9381818181818184,
"grad_norm": 2.3908961716983748,
"learning_rate": 3.94529382739268e-06,
"loss": 0.32702701,
"memory(GiB)": 33.01,
"step": 405,
"train_speed(iter/s)": 0.160429
},
{
"acc": 0.91922884,
"epoch": 2.9745454545454546,
"grad_norm": 3.1103789917359834,
"learning_rate": 3.82751674774648e-06,
"loss": 0.40710459,
"memory(GiB)": 33.01,
"step": 410,
"train_speed(iter/s)": 0.160539
},
{
"epoch": 3.0,
"eval_acc": 0.9214256784123127,
"eval_loss": 0.21592645347118378,
"eval_runtime": 12.9338,
"eval_samples_per_second": 8.969,
"eval_steps_per_second": 1.16,
"step": 414
}
],
"logging_steps": 5,
"max_steps": 685,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 492366226620416.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}