|
{ |
|
"best_metric": 0.21592645, |
|
"best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v6-20250103-052220/checkpoint-414", |
|
"epoch": 3.0, |
|
"eval_steps": 200, |
|
"global_step": 414, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.95261294, |
|
"epoch": 0.007272727272727273, |
|
"grad_norm": 1.9728064130980805, |
|
"learning_rate": 0.0, |
|
"loss": 0.22823659, |
|
"memory(GiB)": 31.98, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.054965 |
|
}, |
|
{ |
|
"acc": 0.93504357, |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 2.237331671502764, |
|
"learning_rate": 4.526808302869428e-06, |
|
"loss": 0.35742611, |
|
"memory(GiB)": 33.01, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.120168 |
|
}, |
|
{ |
|
"acc": 0.91343489, |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 3.274625633110072, |
|
"learning_rate": 6.476398521807292e-06, |
|
"loss": 0.45717983, |
|
"memory(GiB)": 33.01, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.14108 |
|
}, |
|
{ |
|
"acc": 0.93629456, |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 3.424189078760059, |
|
"learning_rate": 7.6168356916586906e-06, |
|
"loss": 0.34272318, |
|
"memory(GiB)": 33.01, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.149645 |
|
}, |
|
{ |
|
"acc": 0.92510157, |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 2.2347104663373494, |
|
"learning_rate": 8.425988740745155e-06, |
|
"loss": 0.38463933, |
|
"memory(GiB)": 33.01, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.154322 |
|
}, |
|
{ |
|
"acc": 0.92444115, |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 2.999692166672811, |
|
"learning_rate": 9.053616605738856e-06, |
|
"loss": 0.38348885, |
|
"memory(GiB)": 33.01, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.15729 |
|
}, |
|
{ |
|
"acc": 0.89717693, |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 3.151051933103401, |
|
"learning_rate": 9.566425910596553e-06, |
|
"loss": 0.50090494, |
|
"memory(GiB)": 33.01, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.159318 |
|
}, |
|
{ |
|
"acc": 0.8918396, |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 3.929292818540404, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.53159599, |
|
"memory(GiB)": 33.01, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.160758 |
|
}, |
|
{ |
|
"acc": 0.91250048, |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 2.9644423662409465, |
|
"learning_rate": 9.998540216393926e-06, |
|
"loss": 0.44619522, |
|
"memory(GiB)": 33.01, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.161864 |
|
}, |
|
{ |
|
"acc": 0.91625786, |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 2.8793186353393256, |
|
"learning_rate": 9.994161718048217e-06, |
|
"loss": 0.46977191, |
|
"memory(GiB)": 33.01, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.162757 |
|
}, |
|
{ |
|
"acc": 0.91005774, |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 2.649498967933029, |
|
"learning_rate": 9.986867061882612e-06, |
|
"loss": 0.4955514, |
|
"memory(GiB)": 33.01, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.163456 |
|
}, |
|
{ |
|
"acc": 0.90930538, |
|
"epoch": 0.4, |
|
"grad_norm": 3.239689305737002, |
|
"learning_rate": 9.976660507770886e-06, |
|
"loss": 0.48896346, |
|
"memory(GiB)": 33.01, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.164017 |
|
}, |
|
{ |
|
"acc": 0.89578419, |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 4.172977184581541, |
|
"learning_rate": 9.963548016053221e-06, |
|
"loss": 0.53288736, |
|
"memory(GiB)": 33.01, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.164492 |
|
}, |
|
{ |
|
"acc": 0.90814514, |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 2.4035166321477672, |
|
"learning_rate": 9.94753724405553e-06, |
|
"loss": 0.43980942, |
|
"memory(GiB)": 33.01, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.164893 |
|
}, |
|
{ |
|
"acc": 0.9009388, |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 2.792817778420491, |
|
"learning_rate": 9.92863754161779e-06, |
|
"loss": 0.52926388, |
|
"memory(GiB)": 33.01, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.165233 |
|
}, |
|
{ |
|
"acc": 0.90013046, |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 2.6334892567714436, |
|
"learning_rate": 9.906859945633999e-06, |
|
"loss": 0.49731359, |
|
"memory(GiB)": 33.01, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.165533 |
|
}, |
|
{ |
|
"acc": 0.9045433, |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 2.332185471916977, |
|
"learning_rate": 9.882217173606948e-06, |
|
"loss": 0.51631742, |
|
"memory(GiB)": 33.01, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.165793 |
|
}, |
|
{ |
|
"acc": 0.92099228, |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 2.3620310828105935, |
|
"learning_rate": 9.854723616221548e-06, |
|
"loss": 0.41591549, |
|
"memory(GiB)": 33.01, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.166024 |
|
}, |
|
{ |
|
"acc": 0.91060734, |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 2.2844767249274214, |
|
"learning_rate": 9.824395328941086e-06, |
|
"loss": 0.44734068, |
|
"memory(GiB)": 33.01, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.16623 |
|
}, |
|
{ |
|
"acc": 0.91947842, |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 2.654847718943916, |
|
"learning_rate": 9.791250022631277e-06, |
|
"loss": 0.4483614, |
|
"memory(GiB)": 33.01, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.166415 |
|
}, |
|
{ |
|
"acc": 0.90607023, |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 2.846204394974626, |
|
"learning_rate": 9.755307053217622e-06, |
|
"loss": 0.47331667, |
|
"memory(GiB)": 33.01, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.16658 |
|
}, |
|
{ |
|
"acc": 0.91776829, |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 4.193055969054884, |
|
"learning_rate": 9.716587410382087e-06, |
|
"loss": 0.45284424, |
|
"memory(GiB)": 33.01, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.166736 |
|
}, |
|
{ |
|
"acc": 0.90584297, |
|
"epoch": 0.8, |
|
"grad_norm": 2.3839939588937082, |
|
"learning_rate": 9.675113705305733e-06, |
|
"loss": 0.49337497, |
|
"memory(GiB)": 33.01, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.166886 |
|
}, |
|
{ |
|
"acc": 0.90228806, |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 2.6045227101709214, |
|
"learning_rate": 9.630910157464404e-06, |
|
"loss": 0.48910527, |
|
"memory(GiB)": 33.01, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.16701 |
|
}, |
|
{ |
|
"acc": 0.90632572, |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.1215112041093906, |
|
"learning_rate": 9.584002580485256e-06, |
|
"loss": 0.4827064, |
|
"memory(GiB)": 33.01, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.16713 |
|
}, |
|
{ |
|
"acc": 0.91771622, |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.8141571001065424, |
|
"learning_rate": 9.534418367072303e-06, |
|
"loss": 0.40849586, |
|
"memory(GiB)": 33.01, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.167246 |
|
}, |
|
{ |
|
"acc": 0.89680672, |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 2.048597621204809, |
|
"learning_rate": 9.482186473009871e-06, |
|
"loss": 0.52411914, |
|
"memory(GiB)": 33.01, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.167353 |
|
}, |
|
{ |
|
"acc": 0.90298738, |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 2.887900153774404, |
|
"learning_rate": 9.427337400253224e-06, |
|
"loss": 0.5198132, |
|
"memory(GiB)": 33.01, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.167445 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_acc": 0.9165654110976104, |
|
"eval_loss": 0.2242395281791687, |
|
"eval_runtime": 12.914, |
|
"eval_samples_per_second": 8.983, |
|
"eval_steps_per_second": 1.162, |
|
"step": 138 |
|
}, |
|
{ |
|
"acc": 0.8111371, |
|
"epoch": 1.0145454545454546, |
|
"grad_norm": 2.8557900772261893, |
|
"learning_rate": 9.381595246879296e-06, |
|
"loss": 0.41494226, |
|
"memory(GiB)": 33.01, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.155909 |
|
}, |
|
{ |
|
"acc": 0.92820644, |
|
"epoch": 1.050909090909091, |
|
"grad_norm": 3.457089179525002, |
|
"learning_rate": 9.322116984002575e-06, |
|
"loss": 0.36117687, |
|
"memory(GiB)": 33.01, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.15618 |
|
}, |
|
{ |
|
"acc": 0.91620007, |
|
"epoch": 1.0872727272727274, |
|
"grad_norm": 2.202599146322218, |
|
"learning_rate": 9.260115018505599e-06, |
|
"loss": 0.44102664, |
|
"memory(GiB)": 33.01, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.156606 |
|
}, |
|
{ |
|
"acc": 0.91883535, |
|
"epoch": 1.1236363636363635, |
|
"grad_norm": 2.3114291842279697, |
|
"learning_rate": 9.195625557790217e-06, |
|
"loss": 0.41582127, |
|
"memory(GiB)": 33.01, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.156919 |
|
}, |
|
{ |
|
"acc": 0.93434258, |
|
"epoch": 1.16, |
|
"grad_norm": 5.406474002236419, |
|
"learning_rate": 9.128686261885441e-06, |
|
"loss": 0.35148592, |
|
"memory(GiB)": 33.01, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.157294 |
|
}, |
|
{ |
|
"acc": 0.91467266, |
|
"epoch": 1.1963636363636363, |
|
"grad_norm": 3.861068773297179, |
|
"learning_rate": 9.059336221455045e-06, |
|
"loss": 0.42474766, |
|
"memory(GiB)": 33.01, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.157643 |
|
}, |
|
{ |
|
"acc": 0.91754112, |
|
"epoch": 1.2327272727272727, |
|
"grad_norm": 3.4508767564011182, |
|
"learning_rate": 8.987615934969708e-06, |
|
"loss": 0.44358654, |
|
"memory(GiB)": 33.01, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.157971 |
|
}, |
|
{ |
|
"acc": 0.89289169, |
|
"epoch": 1.269090909090909, |
|
"grad_norm": 2.9350868391082434, |
|
"learning_rate": 8.913567285057077e-06, |
|
"loss": 0.51100779, |
|
"memory(GiB)": 33.01, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.158289 |
|
}, |
|
{ |
|
"acc": 0.91759186, |
|
"epoch": 1.3054545454545454, |
|
"grad_norm": 2.3549681642790072, |
|
"learning_rate": 8.837233514043455e-06, |
|
"loss": 0.40699306, |
|
"memory(GiB)": 33.01, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.158589 |
|
}, |
|
{ |
|
"acc": 0.9234314, |
|
"epoch": 1.3418181818181818, |
|
"grad_norm": 2.097178456402854, |
|
"learning_rate": 8.758659198701528e-06, |
|
"loss": 0.38527017, |
|
"memory(GiB)": 33.01, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.158868 |
|
}, |
|
{ |
|
"acc": 0.90927277, |
|
"epoch": 1.3781818181818182, |
|
"grad_norm": 2.0240919955649277, |
|
"learning_rate": 8.677890224218774e-06, |
|
"loss": 0.43820124, |
|
"memory(GiB)": 33.01, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.159135 |
|
}, |
|
{ |
|
"acc": 0.92771797, |
|
"epoch": 1.4145454545454546, |
|
"grad_norm": 1.8263954768378603, |
|
"learning_rate": 8.594973757401804e-06, |
|
"loss": 0.38704326, |
|
"memory(GiB)": 33.01, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.159384 |
|
}, |
|
{ |
|
"acc": 0.92044001, |
|
"epoch": 1.450909090909091, |
|
"grad_norm": 2.1904738367354772, |
|
"learning_rate": 8.50995821913228e-06, |
|
"loss": 0.40301366, |
|
"memory(GiB)": 33.01, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.159623 |
|
}, |
|
{ |
|
"acc": 0.91810284, |
|
"epoch": 1.4872727272727273, |
|
"grad_norm": 2.322191659362731, |
|
"learning_rate": 8.42289325609048e-06, |
|
"loss": 0.42446461, |
|
"memory(GiB)": 33.01, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.159854 |
|
}, |
|
{ |
|
"acc": 0.91816349, |
|
"epoch": 1.5236363636363637, |
|
"grad_norm": 3.6766273070353996, |
|
"learning_rate": 8.333829711763038e-06, |
|
"loss": 0.40156956, |
|
"memory(GiB)": 33.01, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.160076 |
|
}, |
|
{ |
|
"acc": 0.9263833, |
|
"epoch": 1.56, |
|
"grad_norm": 4.222791406049376, |
|
"learning_rate": 8.242819596751778e-06, |
|
"loss": 0.40653038, |
|
"memory(GiB)": 33.01, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.160284 |
|
}, |
|
{ |
|
"acc": 0.90884829, |
|
"epoch": 1.5963636363636362, |
|
"grad_norm": 2.774384141884033, |
|
"learning_rate": 8.149916058400986e-06, |
|
"loss": 0.4541821, |
|
"memory(GiB)": 33.01, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.160482 |
|
}, |
|
{ |
|
"acc": 0.92306767, |
|
"epoch": 1.6327272727272728, |
|
"grad_norm": 1.8808774186906907, |
|
"learning_rate": 8.055173349760858e-06, |
|
"loss": 0.38071015, |
|
"memory(GiB)": 33.01, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.160675 |
|
}, |
|
{ |
|
"acc": 0.92462444, |
|
"epoch": 1.669090909090909, |
|
"grad_norm": 2.6660330848450147, |
|
"learning_rate": 7.958646797905251e-06, |
|
"loss": 0.40280228, |
|
"memory(GiB)": 33.01, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.160867 |
|
}, |
|
{ |
|
"acc": 0.90507927, |
|
"epoch": 1.7054545454545456, |
|
"grad_norm": 2.396279628543684, |
|
"learning_rate": 7.860392771622222e-06, |
|
"loss": 0.49732876, |
|
"memory(GiB)": 33.01, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.161052 |
|
}, |
|
{ |
|
"acc": 0.90813217, |
|
"epoch": 1.7418181818181817, |
|
"grad_norm": 1.959098632103441, |
|
"learning_rate": 7.760468648496251e-06, |
|
"loss": 0.45160952, |
|
"memory(GiB)": 33.01, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.161223 |
|
}, |
|
{ |
|
"acc": 0.90353422, |
|
"epoch": 1.7781818181818183, |
|
"grad_norm": 3.350380496148853, |
|
"learning_rate": 7.658932781401341e-06, |
|
"loss": 0.49037123, |
|
"memory(GiB)": 33.01, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.161388 |
|
}, |
|
{ |
|
"acc": 0.91233768, |
|
"epoch": 1.8145454545454545, |
|
"grad_norm": 2.375193909280913, |
|
"learning_rate": 7.5558444644245855e-06, |
|
"loss": 0.45447264, |
|
"memory(GiB)": 33.01, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.161547 |
|
}, |
|
{ |
|
"acc": 0.90856533, |
|
"epoch": 1.850909090909091, |
|
"grad_norm": 2.7061925296961937, |
|
"learning_rate": 7.451263898240091e-06, |
|
"loss": 0.47929668, |
|
"memory(GiB)": 33.01, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.161704 |
|
}, |
|
{ |
|
"acc": 0.92107229, |
|
"epoch": 1.8872727272727272, |
|
"grad_norm": 3.242205419295514, |
|
"learning_rate": 7.345252154953482e-06, |
|
"loss": 0.39586713, |
|
"memory(GiB)": 33.01, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.161853 |
|
}, |
|
{ |
|
"acc": 0.91105738, |
|
"epoch": 1.9236363636363636, |
|
"grad_norm": 3.5795963065620744, |
|
"learning_rate": 7.2378711424375056e-06, |
|
"loss": 0.46428795, |
|
"memory(GiB)": 33.01, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.161996 |
|
}, |
|
{ |
|
"acc": 0.92524033, |
|
"epoch": 1.96, |
|
"grad_norm": 3.652004803975923, |
|
"learning_rate": 7.129183568179582e-06, |
|
"loss": 0.34946482, |
|
"memory(GiB)": 33.01, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.162127 |
|
}, |
|
{ |
|
"acc": 0.91721325, |
|
"epoch": 1.9963636363636363, |
|
"grad_norm": 2.386708723548124, |
|
"learning_rate": 7.019252902662391e-06, |
|
"loss": 0.45530472, |
|
"memory(GiB)": 33.01, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.162264 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_acc": 0.9177804779262859, |
|
"eval_loss": 0.2162669152021408, |
|
"eval_runtime": 12.9627, |
|
"eval_samples_per_second": 8.949, |
|
"eval_steps_per_second": 1.157, |
|
"step": 276 |
|
}, |
|
{ |
|
"acc": 0.83070297, |
|
"epoch": 2.0290909090909093, |
|
"grad_norm": 2.535544826305509, |
|
"learning_rate": 6.908143342298905e-06, |
|
"loss": 0.36057291, |
|
"memory(GiB)": 33.01, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.156683 |
|
}, |
|
{ |
|
"acc": 0.93970337, |
|
"epoch": 2.0654545454545454, |
|
"grad_norm": 2.276921539430186, |
|
"learning_rate": 6.795919771943491e-06, |
|
"loss": 0.32403946, |
|
"memory(GiB)": 33.01, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.156901 |
|
}, |
|
{ |
|
"acc": 0.92168732, |
|
"epoch": 2.101818181818182, |
|
"grad_norm": 4.127638603634917, |
|
"learning_rate": 6.682647727000975e-06, |
|
"loss": 0.41061325, |
|
"memory(GiB)": 33.01, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.157112 |
|
}, |
|
{ |
|
"acc": 0.92185087, |
|
"epoch": 2.138181818181818, |
|
"grad_norm": 2.613159698814904, |
|
"learning_rate": 6.5683933551558196e-06, |
|
"loss": 0.41276412, |
|
"memory(GiB)": 33.01, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.157311 |
|
}, |
|
{ |
|
"acc": 0.93119574, |
|
"epoch": 2.174545454545455, |
|
"grad_norm": 2.4146244471425997, |
|
"learning_rate": 6.4532233777437355e-06, |
|
"loss": 0.34114873, |
|
"memory(GiB)": 33.01, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.157504 |
|
}, |
|
{ |
|
"acc": 0.92488451, |
|
"epoch": 2.210909090909091, |
|
"grad_norm": 2.907208779112958, |
|
"learning_rate": 6.337205050788301e-06, |
|
"loss": 0.38522263, |
|
"memory(GiB)": 33.01, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.15769 |
|
}, |
|
{ |
|
"acc": 0.92167654, |
|
"epoch": 2.247272727272727, |
|
"grad_norm": 2.9896138007592024, |
|
"learning_rate": 6.220406125725334e-06, |
|
"loss": 0.37358305, |
|
"memory(GiB)": 33.01, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.15787 |
|
}, |
|
{ |
|
"acc": 0.9312851, |
|
"epoch": 2.2836363636363637, |
|
"grad_norm": 4.445824844631244, |
|
"learning_rate": 6.102894809837971e-06, |
|
"loss": 0.36833365, |
|
"memory(GiB)": 33.01, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.158047 |
|
}, |
|
{ |
|
"acc": 0.93003368, |
|
"epoch": 2.32, |
|
"grad_norm": 2.3181584670775464, |
|
"learning_rate": 5.984739726425528e-06, |
|
"loss": 0.36130757, |
|
"memory(GiB)": 33.01, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.15822 |
|
}, |
|
{ |
|
"acc": 0.91338739, |
|
"epoch": 2.3563636363636364, |
|
"grad_norm": 2.3002867827492235, |
|
"learning_rate": 5.866009874729421e-06, |
|
"loss": 0.44016724, |
|
"memory(GiB)": 33.01, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.158388 |
|
}, |
|
{ |
|
"acc": 0.93581047, |
|
"epoch": 2.3927272727272726, |
|
"grad_norm": 2.212658837030889, |
|
"learning_rate": 5.746774589639565e-06, |
|
"loss": 0.3611378, |
|
"memory(GiB)": 33.01, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.158548 |
|
}, |
|
{ |
|
"acc": 0.91852398, |
|
"epoch": 2.429090909090909, |
|
"grad_norm": 3.5098558925283343, |
|
"learning_rate": 5.6271035012047395e-06, |
|
"loss": 0.4051528, |
|
"memory(GiB)": 33.01, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.158706 |
|
}, |
|
{ |
|
"acc": 0.92445469, |
|
"epoch": 2.4654545454545453, |
|
"grad_norm": 2.3254480753766833, |
|
"learning_rate": 5.507066493970609e-06, |
|
"loss": 0.38657694, |
|
"memory(GiB)": 33.01, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.158859 |
|
}, |
|
{ |
|
"acc": 0.94319763, |
|
"epoch": 2.501818181818182, |
|
"grad_norm": 2.546749274650209, |
|
"learning_rate": 5.386733666169105e-06, |
|
"loss": 0.3047235, |
|
"memory(GiB)": 33.01, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.159007 |
|
}, |
|
{ |
|
"acc": 0.92226295, |
|
"epoch": 2.538181818181818, |
|
"grad_norm": 2.061710328743839, |
|
"learning_rate": 5.266175288783046e-06, |
|
"loss": 0.38313189, |
|
"memory(GiB)": 33.01, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.159149 |
|
}, |
|
{ |
|
"acc": 0.93342876, |
|
"epoch": 2.5745454545454547, |
|
"grad_norm": 3.3978986222055916, |
|
"learning_rate": 5.1454617645098595e-06, |
|
"loss": 0.32807801, |
|
"memory(GiB)": 33.01, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.159288 |
|
}, |
|
{ |
|
"acc": 0.92254868, |
|
"epoch": 2.610909090909091, |
|
"grad_norm": 2.36401523000578, |
|
"learning_rate": 5.024663586648378e-06, |
|
"loss": 0.37164879, |
|
"memory(GiB)": 33.01, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.159425 |
|
}, |
|
{ |
|
"acc": 0.93187628, |
|
"epoch": 2.6472727272727274, |
|
"grad_norm": 2.0346344864902726, |
|
"learning_rate": 4.903851297932749e-06, |
|
"loss": 0.35205598, |
|
"memory(GiB)": 33.01, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.159489 |
|
}, |
|
{ |
|
"acc": 0.92569065, |
|
"epoch": 2.6836363636363636, |
|
"grad_norm": 2.7461167183892883, |
|
"learning_rate": 4.783095449337462e-06, |
|
"loss": 0.35683942, |
|
"memory(GiB)": 33.01, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.159622 |
|
}, |
|
{ |
|
"acc": 0.92851734, |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 2.7763819290375316, |
|
"learning_rate": 4.6624665588775675e-06, |
|
"loss": 0.35814347, |
|
"memory(GiB)": 33.01, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.159722 |
|
}, |
|
{ |
|
"acc": 0.94960651, |
|
"epoch": 2.7563636363636363, |
|
"grad_norm": 2.4971767216968956, |
|
"learning_rate": 4.542035070428136e-06, |
|
"loss": 0.27659984, |
|
"memory(GiB)": 33.01, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.159847 |
|
}, |
|
{ |
|
"acc": 0.92807074, |
|
"epoch": 2.792727272727273, |
|
"grad_norm": 2.683132297237531, |
|
"learning_rate": 4.4218713125870236e-06, |
|
"loss": 0.37527924, |
|
"memory(GiB)": 33.01, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.159969 |
|
}, |
|
{ |
|
"acc": 0.93306198, |
|
"epoch": 2.829090909090909, |
|
"grad_norm": 2.295750889124929, |
|
"learning_rate": 4.302045457604953e-06, |
|
"loss": 0.33568211, |
|
"memory(GiB)": 33.01, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.160089 |
|
}, |
|
{ |
|
"acc": 0.92898979, |
|
"epoch": 2.8654545454545453, |
|
"grad_norm": 2.07143338080191, |
|
"learning_rate": 4.182627480406894e-06, |
|
"loss": 0.34494858, |
|
"memory(GiB)": 33.01, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.160203 |
|
}, |
|
{ |
|
"acc": 0.92927322, |
|
"epoch": 2.901818181818182, |
|
"grad_norm": 3.061148841919245, |
|
"learning_rate": 4.063687117728671e-06, |
|
"loss": 0.37803557, |
|
"memory(GiB)": 33.01, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.160316 |
|
}, |
|
{ |
|
"acc": 0.93406506, |
|
"epoch": 2.9381818181818184, |
|
"grad_norm": 2.3908961716983748, |
|
"learning_rate": 3.94529382739268e-06, |
|
"loss": 0.32702701, |
|
"memory(GiB)": 33.01, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.160429 |
|
}, |
|
{ |
|
"acc": 0.91922884, |
|
"epoch": 2.9745454545454546, |
|
"grad_norm": 3.1103789917359834, |
|
"learning_rate": 3.82751674774648e-06, |
|
"loss": 0.40710459, |
|
"memory(GiB)": 33.01, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.160539 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_acc": 0.9214256784123127, |
|
"eval_loss": 0.21592645347118378, |
|
"eval_runtime": 12.9338, |
|
"eval_samples_per_second": 8.969, |
|
"eval_steps_per_second": 1.16, |
|
"step": 414 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 685, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 492366226620416.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|