|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99889339727038, |
|
"eval_steps": 50, |
|
"global_step": 677, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.85936797, |
|
"epoch": 0.0014754703061600886, |
|
"grad_norm": 7.874454151515785, |
|
"learning_rate": 0.0, |
|
"loss": 0.68658942, |
|
"memory(GiB)": 24.89, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.03037 |
|
}, |
|
{ |
|
"acc": 0.84321463, |
|
"epoch": 0.0073773515308004425, |
|
"grad_norm": 8.79654818500605, |
|
"learning_rate": 7.628557760232497e-07, |
|
"loss": 0.79017758, |
|
"memory(GiB)": 31.87, |
|
"step": 5, |
|
"train_speed(iter/s)": 0.092709 |
|
}, |
|
{ |
|
"acc": 0.85256624, |
|
"epoch": 0.014754703061600885, |
|
"grad_norm": 8.005772072681205, |
|
"learning_rate": 1.0913998759473501e-06, |
|
"loss": 0.70760584, |
|
"memory(GiB)": 33.75, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.120868 |
|
}, |
|
{ |
|
"acc": 0.85825052, |
|
"epoch": 0.022132054592401328, |
|
"grad_norm": 4.861872738410458, |
|
"learning_rate": 1.2835858542361333e-06, |
|
"loss": 0.64002485, |
|
"memory(GiB)": 33.01, |
|
"step": 15, |
|
"train_speed(iter/s)": 0.137764 |
|
}, |
|
{ |
|
"acc": 0.8677763, |
|
"epoch": 0.02950940612320177, |
|
"grad_norm": 2.624090927434735, |
|
"learning_rate": 1.4199439758714505e-06, |
|
"loss": 0.5428031, |
|
"memory(GiB)": 34.84, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.148523 |
|
}, |
|
{ |
|
"acc": 0.88262272, |
|
"epoch": 0.03688675765400221, |
|
"grad_norm": 2.2979293864903276, |
|
"learning_rate": 1.5257115520464994e-06, |
|
"loss": 0.45293074, |
|
"memory(GiB)": 31.42, |
|
"step": 25, |
|
"train_speed(iter/s)": 0.152816 |
|
}, |
|
{ |
|
"acc": 0.88684368, |
|
"epoch": 0.044264109184802655, |
|
"grad_norm": 2.321279166108657, |
|
"learning_rate": 1.6121299541602339e-06, |
|
"loss": 0.44487882, |
|
"memory(GiB)": 34.17, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.158226 |
|
}, |
|
{ |
|
"acc": 0.88785019, |
|
"epoch": 0.0516414607156031, |
|
"grad_norm": 1.6462078924259171, |
|
"learning_rate": 1.6851956720581583e-06, |
|
"loss": 0.42431307, |
|
"memory(GiB)": 33.89, |
|
"step": 35, |
|
"train_speed(iter/s)": 0.160915 |
|
}, |
|
{ |
|
"acc": 0.88771706, |
|
"epoch": 0.05901881224640354, |
|
"grad_norm": 2.0535907435541323, |
|
"learning_rate": 1.7484880757955508e-06, |
|
"loss": 0.41692309, |
|
"memory(GiB)": 33.45, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.162212 |
|
}, |
|
{ |
|
"acc": 0.89934006, |
|
"epoch": 0.06639616377720399, |
|
"grad_norm": 1.880024272875225, |
|
"learning_rate": 1.8043159324490168e-06, |
|
"loss": 0.37824535, |
|
"memory(GiB)": 32.49, |
|
"step": 45, |
|
"train_speed(iter/s)": 0.164895 |
|
}, |
|
{ |
|
"acc": 0.89317064, |
|
"epoch": 0.07377351530800443, |
|
"grad_norm": 2.4862794709135483, |
|
"learning_rate": 1.8542556519706e-06, |
|
"loss": 0.39434323, |
|
"memory(GiB)": 31.37, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.166039 |
|
}, |
|
{ |
|
"epoch": 0.07377351530800443, |
|
"eval_acc": 0.8897788969852836, |
|
"eval_loss": 0.3586576581001282, |
|
"eval_runtime": 9.1458, |
|
"eval_samples_per_second": 23.836, |
|
"eval_steps_per_second": 3.062, |
|
"step": 50 |
|
}, |
|
{ |
|
"acc": 0.90738754, |
|
"epoch": 0.08115086683880487, |
|
"grad_norm": 1.818011862869067, |
|
"learning_rate": 1.8994316234174147e-06, |
|
"loss": 0.34018734, |
|
"memory(GiB)": 43.99, |
|
"step": 55, |
|
"train_speed(iter/s)": 0.163069 |
|
}, |
|
{ |
|
"acc": 0.89877386, |
|
"epoch": 0.08852821836960531, |
|
"grad_norm": 2.769061395622785, |
|
"learning_rate": 1.940674054084334e-06, |
|
"loss": 0.3834722, |
|
"memory(GiB)": 33.18, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.163587 |
|
}, |
|
{ |
|
"acc": 0.89560518, |
|
"epoch": 0.09590556990040576, |
|
"grad_norm": 3.0254291124967776, |
|
"learning_rate": 1.9786134125433064e-06, |
|
"loss": 0.40774279, |
|
"memory(GiB)": 36.96, |
|
"step": 65, |
|
"train_speed(iter/s)": 0.163438 |
|
}, |
|
{ |
|
"acc": 0.90745316, |
|
"epoch": 0.1032829214312062, |
|
"grad_norm": 1.9702664127406297, |
|
"learning_rate": 1.998444790046656e-06, |
|
"loss": 0.34646974, |
|
"memory(GiB)": 33.91, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.165839 |
|
}, |
|
{ |
|
"acc": 0.90453644, |
|
"epoch": 0.11066027296200664, |
|
"grad_norm": 1.956498769069037, |
|
"learning_rate": 1.990668740279938e-06, |
|
"loss": 0.34771657, |
|
"memory(GiB)": 32.4, |
|
"step": 75, |
|
"train_speed(iter/s)": 0.166283 |
|
}, |
|
{ |
|
"acc": 0.90620461, |
|
"epoch": 0.11803762449280708, |
|
"grad_norm": 1.7929520466502804, |
|
"learning_rate": 1.9828926905132194e-06, |
|
"loss": 0.34979777, |
|
"memory(GiB)": 32.69, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.166045 |
|
}, |
|
{ |
|
"acc": 0.90826426, |
|
"epoch": 0.12541497602360752, |
|
"grad_norm": 2.255532399806791, |
|
"learning_rate": 1.975116640746501e-06, |
|
"loss": 0.34021211, |
|
"memory(GiB)": 32.39, |
|
"step": 85, |
|
"train_speed(iter/s)": 0.16736 |
|
}, |
|
{ |
|
"acc": 0.90400352, |
|
"epoch": 0.13279232755440798, |
|
"grad_norm": 1.606426887028717, |
|
"learning_rate": 1.9673405909797823e-06, |
|
"loss": 0.3593976, |
|
"memory(GiB)": 33.28, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.166086 |
|
}, |
|
{ |
|
"acc": 0.90273075, |
|
"epoch": 0.14016967908520842, |
|
"grad_norm": 1.7550090784719037, |
|
"learning_rate": 1.959564541213064e-06, |
|
"loss": 0.34527693, |
|
"memory(GiB)": 32.74, |
|
"step": 95, |
|
"train_speed(iter/s)": 0.167937 |
|
}, |
|
{ |
|
"acc": 0.90631161, |
|
"epoch": 0.14754703061600885, |
|
"grad_norm": 2.151177976553762, |
|
"learning_rate": 1.9517884914463452e-06, |
|
"loss": 0.34601164, |
|
"memory(GiB)": 34.44, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.167745 |
|
}, |
|
{ |
|
"epoch": 0.14754703061600885, |
|
"eval_acc": 0.8985658665523646, |
|
"eval_loss": 0.3217943012714386, |
|
"eval_runtime": 9.0118, |
|
"eval_samples_per_second": 24.19, |
|
"eval_steps_per_second": 3.107, |
|
"step": 100 |
|
}, |
|
{ |
|
"acc": 0.90445766, |
|
"epoch": 0.1549243821468093, |
|
"grad_norm": 2.0562867995030527, |
|
"learning_rate": 1.9440124416796267e-06, |
|
"loss": 0.34789481, |
|
"memory(GiB)": 42.9, |
|
"step": 105, |
|
"train_speed(iter/s)": 0.164588 |
|
}, |
|
{ |
|
"acc": 0.90358963, |
|
"epoch": 0.16230173367760975, |
|
"grad_norm": 1.8705476431194374, |
|
"learning_rate": 1.936236391912908e-06, |
|
"loss": 0.34220786, |
|
"memory(GiB)": 31.78, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.165873 |
|
}, |
|
{ |
|
"acc": 0.9085845, |
|
"epoch": 0.16967908520841019, |
|
"grad_norm": 1.8278699994168497, |
|
"learning_rate": 1.9284603421461896e-06, |
|
"loss": 0.3233917, |
|
"memory(GiB)": 31.86, |
|
"step": 115, |
|
"train_speed(iter/s)": 0.16598 |
|
}, |
|
{ |
|
"acc": 0.90997429, |
|
"epoch": 0.17705643673921062, |
|
"grad_norm": 1.945716912044592, |
|
"learning_rate": 1.920684292379471e-06, |
|
"loss": 0.34307232, |
|
"memory(GiB)": 35.12, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.166556 |
|
}, |
|
{ |
|
"acc": 0.91014824, |
|
"epoch": 0.18443378827001106, |
|
"grad_norm": 1.7135397704667659, |
|
"learning_rate": 1.912908242612753e-06, |
|
"loss": 0.32152495, |
|
"memory(GiB)": 35.65, |
|
"step": 125, |
|
"train_speed(iter/s)": 0.167431 |
|
}, |
|
{ |
|
"acc": 0.9074892, |
|
"epoch": 0.19181113980081152, |
|
"grad_norm": 1.7116721779311537, |
|
"learning_rate": 1.9051321928460342e-06, |
|
"loss": 0.32937753, |
|
"memory(GiB)": 33.19, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.167152 |
|
}, |
|
{ |
|
"acc": 0.90999937, |
|
"epoch": 0.19918849133161196, |
|
"grad_norm": 1.6389355962957932, |
|
"learning_rate": 1.8973561430793156e-06, |
|
"loss": 0.33004179, |
|
"memory(GiB)": 33.36, |
|
"step": 135, |
|
"train_speed(iter/s)": 0.168049 |
|
}, |
|
{ |
|
"acc": 0.9056819, |
|
"epoch": 0.2065658428624124, |
|
"grad_norm": 1.618401896535921, |
|
"learning_rate": 1.889580093312597e-06, |
|
"loss": 0.32887373, |
|
"memory(GiB)": 31.72, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.167987 |
|
}, |
|
{ |
|
"acc": 0.90799198, |
|
"epoch": 0.21394319439321283, |
|
"grad_norm": 2.0697336354422076, |
|
"learning_rate": 1.8818040435458787e-06, |
|
"loss": 0.33212447, |
|
"memory(GiB)": 32.61, |
|
"step": 145, |
|
"train_speed(iter/s)": 0.168358 |
|
}, |
|
{ |
|
"acc": 0.89975605, |
|
"epoch": 0.2213205459240133, |
|
"grad_norm": 1.645561918074026, |
|
"learning_rate": 1.8740279937791602e-06, |
|
"loss": 0.35846872, |
|
"memory(GiB)": 32.3, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.169041 |
|
}, |
|
{ |
|
"epoch": 0.2213205459240133, |
|
"eval_acc": 0.9009412058865552, |
|
"eval_loss": 0.31137242913246155, |
|
"eval_runtime": 8.9003, |
|
"eval_samples_per_second": 24.494, |
|
"eval_steps_per_second": 3.146, |
|
"step": 150 |
|
}, |
|
{ |
|
"acc": 0.90751858, |
|
"epoch": 0.22869789745481373, |
|
"grad_norm": 1.717914687308357, |
|
"learning_rate": 1.8662519440124416e-06, |
|
"loss": 0.33635845, |
|
"memory(GiB)": 43.6, |
|
"step": 155, |
|
"train_speed(iter/s)": 0.167082 |
|
}, |
|
{ |
|
"acc": 0.90450516, |
|
"epoch": 0.23607524898561416, |
|
"grad_norm": 1.6863266349964434, |
|
"learning_rate": 1.858475894245723e-06, |
|
"loss": 0.35405197, |
|
"memory(GiB)": 33.81, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.167855 |
|
}, |
|
{ |
|
"acc": 0.90395164, |
|
"epoch": 0.2434526005164146, |
|
"grad_norm": 2.1013428529714906, |
|
"learning_rate": 1.8506998444790045e-06, |
|
"loss": 0.34658258, |
|
"memory(GiB)": 32.9, |
|
"step": 165, |
|
"train_speed(iter/s)": 0.167867 |
|
}, |
|
{ |
|
"acc": 0.91127558, |
|
"epoch": 0.25082995204721503, |
|
"grad_norm": 1.6631238092162342, |
|
"learning_rate": 1.842923794712286e-06, |
|
"loss": 0.32777104, |
|
"memory(GiB)": 33.53, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.168028 |
|
}, |
|
{ |
|
"acc": 0.90831413, |
|
"epoch": 0.25820730357801547, |
|
"grad_norm": 2.0857884493375756, |
|
"learning_rate": 1.8351477449455676e-06, |
|
"loss": 0.32164063, |
|
"memory(GiB)": 32.03, |
|
"step": 175, |
|
"train_speed(iter/s)": 0.169138 |
|
}, |
|
{ |
|
"acc": 0.91539364, |
|
"epoch": 0.26558465510881596, |
|
"grad_norm": 2.0145344122511095, |
|
"learning_rate": 1.827371695178849e-06, |
|
"loss": 0.30975475, |
|
"memory(GiB)": 34.31, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.168973 |
|
}, |
|
{ |
|
"acc": 0.9064558, |
|
"epoch": 0.2729620066396164, |
|
"grad_norm": 1.6651879684580124, |
|
"learning_rate": 1.8195956454121305e-06, |
|
"loss": 0.3413609, |
|
"memory(GiB)": 32.63, |
|
"step": 185, |
|
"train_speed(iter/s)": 0.169312 |
|
}, |
|
{ |
|
"acc": 0.90828686, |
|
"epoch": 0.28033935817041683, |
|
"grad_norm": 2.3469960245148056, |
|
"learning_rate": 1.811819595645412e-06, |
|
"loss": 0.32660947, |
|
"memory(GiB)": 33.41, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.169856 |
|
}, |
|
{ |
|
"acc": 0.91549397, |
|
"epoch": 0.28771670970121727, |
|
"grad_norm": 2.1806025367886117, |
|
"learning_rate": 1.8040435458786937e-06, |
|
"loss": 0.30616875, |
|
"memory(GiB)": 36.24, |
|
"step": 195, |
|
"train_speed(iter/s)": 0.169761 |
|
}, |
|
{ |
|
"acc": 0.90924969, |
|
"epoch": 0.2950940612320177, |
|
"grad_norm": 1.5587292681869693, |
|
"learning_rate": 1.7962674961119751e-06, |
|
"loss": 0.32027857, |
|
"memory(GiB)": 32.62, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.170581 |
|
}, |
|
{ |
|
"epoch": 0.2950940612320177, |
|
"eval_acc": 0.901896699528504, |
|
"eval_loss": 0.3015853464603424, |
|
"eval_runtime": 9.0231, |
|
"eval_samples_per_second": 24.16, |
|
"eval_steps_per_second": 3.103, |
|
"step": 200 |
|
}, |
|
{ |
|
"acc": 0.91348085, |
|
"epoch": 0.30247141276281814, |
|
"grad_norm": 1.7818986098446097, |
|
"learning_rate": 1.7884914463452566e-06, |
|
"loss": 0.30208986, |
|
"memory(GiB)": 44.06, |
|
"step": 205, |
|
"train_speed(iter/s)": 0.169194 |
|
}, |
|
{ |
|
"acc": 0.90921364, |
|
"epoch": 0.3098487642936186, |
|
"grad_norm": 4.02077354284952, |
|
"learning_rate": 1.780715396578538e-06, |
|
"loss": 0.31497798, |
|
"memory(GiB)": 34.58, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.169003 |
|
}, |
|
{ |
|
"acc": 0.91234264, |
|
"epoch": 0.317226115824419, |
|
"grad_norm": 1.856976113207096, |
|
"learning_rate": 1.7729393468118195e-06, |
|
"loss": 0.30694566, |
|
"memory(GiB)": 33.8, |
|
"step": 215, |
|
"train_speed(iter/s)": 0.16984 |
|
}, |
|
{ |
|
"acc": 0.91051998, |
|
"epoch": 0.3246034673552195, |
|
"grad_norm": 1.7185168230569432, |
|
"learning_rate": 1.765163297045101e-06, |
|
"loss": 0.30961909, |
|
"memory(GiB)": 32.79, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.169666 |
|
}, |
|
{ |
|
"acc": 0.90716095, |
|
"epoch": 0.33198081888601993, |
|
"grad_norm": 1.340608010048739, |
|
"learning_rate": 1.7573872472783826e-06, |
|
"loss": 0.32777991, |
|
"memory(GiB)": 32.43, |
|
"step": 225, |
|
"train_speed(iter/s)": 0.169965 |
|
}, |
|
{ |
|
"acc": 0.91547451, |
|
"epoch": 0.33935817041682037, |
|
"grad_norm": 1.6059763623857688, |
|
"learning_rate": 1.749611197511664e-06, |
|
"loss": 0.30423913, |
|
"memory(GiB)": 34.95, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.169935 |
|
}, |
|
{ |
|
"acc": 0.917132, |
|
"epoch": 0.3467355219476208, |
|
"grad_norm": 2.0390121908637644, |
|
"learning_rate": 1.7418351477449455e-06, |
|
"loss": 0.30788417, |
|
"memory(GiB)": 34.18, |
|
"step": 235, |
|
"train_speed(iter/s)": 0.169583 |
|
}, |
|
{ |
|
"acc": 0.92253389, |
|
"epoch": 0.35411287347842124, |
|
"grad_norm": 1.7323441045370742, |
|
"learning_rate": 1.734059097978227e-06, |
|
"loss": 0.27823753, |
|
"memory(GiB)": 31.85, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.17024 |
|
}, |
|
{ |
|
"acc": 0.91325512, |
|
"epoch": 0.3614902250092217, |
|
"grad_norm": 1.6955182367729624, |
|
"learning_rate": 1.7262830482115086e-06, |
|
"loss": 0.31402481, |
|
"memory(GiB)": 32.14, |
|
"step": 245, |
|
"train_speed(iter/s)": 0.169973 |
|
}, |
|
{ |
|
"acc": 0.91568565, |
|
"epoch": 0.3688675765400221, |
|
"grad_norm": 1.5212817841417117, |
|
"learning_rate": 1.71850699844479e-06, |
|
"loss": 0.29354782, |
|
"memory(GiB)": 33.28, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.169891 |
|
}, |
|
{ |
|
"epoch": 0.3688675765400221, |
|
"eval_acc": 0.903888055436491, |
|
"eval_loss": 0.2949393689632416, |
|
"eval_runtime": 8.8569, |
|
"eval_samples_per_second": 24.614, |
|
"eval_steps_per_second": 3.161, |
|
"step": 250 |
|
}, |
|
{ |
|
"acc": 0.91542091, |
|
"epoch": 0.37624492807082255, |
|
"grad_norm": 1.872512089057089, |
|
"learning_rate": 1.7107309486780715e-06, |
|
"loss": 0.29765024, |
|
"memory(GiB)": 43.8, |
|
"step": 255, |
|
"train_speed(iter/s)": 0.169287 |
|
}, |
|
{ |
|
"acc": 0.90894642, |
|
"epoch": 0.38362227960162304, |
|
"grad_norm": 2.118992381164901, |
|
"learning_rate": 1.702954898911353e-06, |
|
"loss": 0.32009149, |
|
"memory(GiB)": 33.0, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.169108 |
|
}, |
|
{ |
|
"acc": 0.91895199, |
|
"epoch": 0.3909996311324235, |
|
"grad_norm": 1.8087446200238866, |
|
"learning_rate": 1.6951788491446344e-06, |
|
"loss": 0.28518291, |
|
"memory(GiB)": 33.64, |
|
"step": 265, |
|
"train_speed(iter/s)": 0.169659 |
|
}, |
|
{ |
|
"acc": 0.91831837, |
|
"epoch": 0.3983769826632239, |
|
"grad_norm": 2.295227865477349, |
|
"learning_rate": 1.6874027993779158e-06, |
|
"loss": 0.29493954, |
|
"memory(GiB)": 32.16, |
|
"step": 270, |
|
"train_speed(iter/s)": 0.16921 |
|
}, |
|
{ |
|
"acc": 0.91772842, |
|
"epoch": 0.40575433419402435, |
|
"grad_norm": 1.8335936104899577, |
|
"learning_rate": 1.6796267496111975e-06, |
|
"loss": 0.29295368, |
|
"memory(GiB)": 32.48, |
|
"step": 275, |
|
"train_speed(iter/s)": 0.169211 |
|
}, |
|
{ |
|
"acc": 0.9184288, |
|
"epoch": 0.4131316857248248, |
|
"grad_norm": 1.9183997806679902, |
|
"learning_rate": 1.671850699844479e-06, |
|
"loss": 0.29449196, |
|
"memory(GiB)": 32.65, |
|
"step": 280, |
|
"train_speed(iter/s)": 0.169821 |
|
}, |
|
{ |
|
"acc": 0.91275759, |
|
"epoch": 0.4205090372556252, |
|
"grad_norm": 1.5737005817463792, |
|
"learning_rate": 1.6640746500777604e-06, |
|
"loss": 0.30824404, |
|
"memory(GiB)": 32.27, |
|
"step": 285, |
|
"train_speed(iter/s)": 0.169618 |
|
}, |
|
{ |
|
"acc": 0.91761837, |
|
"epoch": 0.42788638878642565, |
|
"grad_norm": 1.6411868652328097, |
|
"learning_rate": 1.6562986003110419e-06, |
|
"loss": 0.28589807, |
|
"memory(GiB)": 33.9, |
|
"step": 290, |
|
"train_speed(iter/s)": 0.16978 |
|
}, |
|
{ |
|
"acc": 0.91096239, |
|
"epoch": 0.4352637403172261, |
|
"grad_norm": 1.4763719992796571, |
|
"learning_rate": 1.6485225505443235e-06, |
|
"loss": 0.31501875, |
|
"memory(GiB)": 33.9, |
|
"step": 295, |
|
"train_speed(iter/s)": 0.170116 |
|
}, |
|
{ |
|
"acc": 0.92102461, |
|
"epoch": 0.4426410918480266, |
|
"grad_norm": 1.7038633862826587, |
|
"learning_rate": 1.640746500777605e-06, |
|
"loss": 0.28700156, |
|
"memory(GiB)": 33.12, |
|
"step": 300, |
|
"train_speed(iter/s)": 0.16999 |
|
}, |
|
{ |
|
"epoch": 0.4426410918480266, |
|
"eval_acc": 0.904986426632376, |
|
"eval_loss": 0.28871360421180725, |
|
"eval_runtime": 8.8172, |
|
"eval_samples_per_second": 24.724, |
|
"eval_steps_per_second": 3.176, |
|
"step": 300 |
|
}, |
|
{ |
|
"acc": 0.9137413, |
|
"epoch": 0.450018443378827, |
|
"grad_norm": 1.5572757830459178, |
|
"learning_rate": 1.6329704510108864e-06, |
|
"loss": 0.3066596, |
|
"memory(GiB)": 44.77, |
|
"step": 305, |
|
"train_speed(iter/s)": 0.169643 |
|
}, |
|
{ |
|
"acc": 0.92225361, |
|
"epoch": 0.45739579490962745, |
|
"grad_norm": 1.7973596806557957, |
|
"learning_rate": 1.6251944012441679e-06, |
|
"loss": 0.28060098, |
|
"memory(GiB)": 34.38, |
|
"step": 310, |
|
"train_speed(iter/s)": 0.169469 |
|
}, |
|
{ |
|
"acc": 0.91542816, |
|
"epoch": 0.4647731464404279, |
|
"grad_norm": 1.7774091029439925, |
|
"learning_rate": 1.6174183514774493e-06, |
|
"loss": 0.29976537, |
|
"memory(GiB)": 33.81, |
|
"step": 315, |
|
"train_speed(iter/s)": 0.169523 |
|
}, |
|
{ |
|
"acc": 0.91291943, |
|
"epoch": 0.4721504979712283, |
|
"grad_norm": 1.3755306649838441, |
|
"learning_rate": 1.6096423017107308e-06, |
|
"loss": 0.30613976, |
|
"memory(GiB)": 33.81, |
|
"step": 320, |
|
"train_speed(iter/s)": 0.169769 |
|
}, |
|
{ |
|
"acc": 0.90916691, |
|
"epoch": 0.47952784950202876, |
|
"grad_norm": 1.9213831375809023, |
|
"learning_rate": 1.6018662519440122e-06, |
|
"loss": 0.32510529, |
|
"memory(GiB)": 34.44, |
|
"step": 325, |
|
"train_speed(iter/s)": 0.169545 |
|
}, |
|
{ |
|
"acc": 0.91636696, |
|
"epoch": 0.4869052010328292, |
|
"grad_norm": 1.8837685149781478, |
|
"learning_rate": 1.5940902021772939e-06, |
|
"loss": 0.30537646, |
|
"memory(GiB)": 31.2, |
|
"step": 330, |
|
"train_speed(iter/s)": 0.170038 |
|
}, |
|
{ |
|
"acc": 0.91307325, |
|
"epoch": 0.4942825525636297, |
|
"grad_norm": 1.8595782698159422, |
|
"learning_rate": 1.5863141524105753e-06, |
|
"loss": 0.30300996, |
|
"memory(GiB)": 30.74, |
|
"step": 335, |
|
"train_speed(iter/s)": 0.169983 |
|
}, |
|
{ |
|
"acc": 0.91927223, |
|
"epoch": 0.5016599040944301, |
|
"grad_norm": 1.8693944311229003, |
|
"learning_rate": 1.5785381026438568e-06, |
|
"loss": 0.28294766, |
|
"memory(GiB)": 31.5, |
|
"step": 340, |
|
"train_speed(iter/s)": 0.170169 |
|
}, |
|
{ |
|
"acc": 0.92018118, |
|
"epoch": 0.5090372556252305, |
|
"grad_norm": 1.6240951695142463, |
|
"learning_rate": 1.5707620528771385e-06, |
|
"loss": 0.27536349, |
|
"memory(GiB)": 32.84, |
|
"step": 345, |
|
"train_speed(iter/s)": 0.170494 |
|
}, |
|
{ |
|
"acc": 0.91428967, |
|
"epoch": 0.5164146071560309, |
|
"grad_norm": 2.0654305075288653, |
|
"learning_rate": 1.56298600311042e-06, |
|
"loss": 0.30193062, |
|
"memory(GiB)": 33.88, |
|
"step": 350, |
|
"train_speed(iter/s)": 0.170499 |
|
}, |
|
{ |
|
"epoch": 0.5164146071560309, |
|
"eval_acc": 0.906031218745535, |
|
"eval_loss": 0.2829771637916565, |
|
"eval_runtime": 8.9252, |
|
"eval_samples_per_second": 24.425, |
|
"eval_steps_per_second": 3.137, |
|
"step": 350 |
|
}, |
|
{ |
|
"acc": 0.92116051, |
|
"epoch": 0.5237919586868315, |
|
"grad_norm": 2.2709862324112136, |
|
"learning_rate": 1.5552099533437014e-06, |
|
"loss": 0.277144, |
|
"memory(GiB)": 44.05, |
|
"step": 355, |
|
"train_speed(iter/s)": 0.169773 |
|
}, |
|
{ |
|
"acc": 0.90278854, |
|
"epoch": 0.5311693102176319, |
|
"grad_norm": 1.9738153042801483, |
|
"learning_rate": 1.5474339035769828e-06, |
|
"loss": 0.33822517, |
|
"memory(GiB)": 31.78, |
|
"step": 360, |
|
"train_speed(iter/s)": 0.170163 |
|
}, |
|
{ |
|
"acc": 0.92497654, |
|
"epoch": 0.5385466617484324, |
|
"grad_norm": 1.2430005126419985, |
|
"learning_rate": 1.5396578538102643e-06, |
|
"loss": 0.26646669, |
|
"memory(GiB)": 33.8, |
|
"step": 365, |
|
"train_speed(iter/s)": 0.16992 |
|
}, |
|
{ |
|
"acc": 0.91328669, |
|
"epoch": 0.5459240132792328, |
|
"grad_norm": 1.732568460701246, |
|
"learning_rate": 1.5318818040435457e-06, |
|
"loss": 0.30124869, |
|
"memory(GiB)": 34.07, |
|
"step": 370, |
|
"train_speed(iter/s)": 0.170382 |
|
}, |
|
{ |
|
"acc": 0.91603355, |
|
"epoch": 0.5533013648100332, |
|
"grad_norm": 1.6627563648419381, |
|
"learning_rate": 1.5241057542768272e-06, |
|
"loss": 0.29759171, |
|
"memory(GiB)": 32.61, |
|
"step": 375, |
|
"train_speed(iter/s)": 0.170197 |
|
}, |
|
{ |
|
"acc": 0.90871716, |
|
"epoch": 0.5606787163408337, |
|
"grad_norm": 2.1331488669107492, |
|
"learning_rate": 1.5163297045101088e-06, |
|
"loss": 0.33630853, |
|
"memory(GiB)": 32.33, |
|
"step": 380, |
|
"train_speed(iter/s)": 0.17029 |
|
}, |
|
{ |
|
"acc": 0.90700073, |
|
"epoch": 0.5680560678716341, |
|
"grad_norm": 2.080763753555995, |
|
"learning_rate": 1.5085536547433903e-06, |
|
"loss": 0.325877, |
|
"memory(GiB)": 32.95, |
|
"step": 385, |
|
"train_speed(iter/s)": 0.170474 |
|
}, |
|
{ |
|
"acc": 0.91835623, |
|
"epoch": 0.5754334194024345, |
|
"grad_norm": 1.5911495384236254, |
|
"learning_rate": 1.500777604976672e-06, |
|
"loss": 0.28332872, |
|
"memory(GiB)": 31.78, |
|
"step": 390, |
|
"train_speed(iter/s)": 0.170283 |
|
}, |
|
{ |
|
"acc": 0.91712914, |
|
"epoch": 0.582810770933235, |
|
"grad_norm": 1.6237776507352246, |
|
"learning_rate": 1.4930015552099534e-06, |
|
"loss": 0.28782868, |
|
"memory(GiB)": 33.13, |
|
"step": 395, |
|
"train_speed(iter/s)": 0.170424 |
|
}, |
|
{ |
|
"acc": 0.92452984, |
|
"epoch": 0.5901881224640354, |
|
"grad_norm": 1.9617693211652296, |
|
"learning_rate": 1.4852255054432348e-06, |
|
"loss": 0.25721183, |
|
"memory(GiB)": 34.52, |
|
"step": 400, |
|
"train_speed(iter/s)": 0.170549 |
|
}, |
|
{ |
|
"epoch": 0.5901881224640354, |
|
"eval_acc": 0.9067634662094585, |
|
"eval_loss": 0.27780693769454956, |
|
"eval_runtime": 8.9713, |
|
"eval_samples_per_second": 24.3, |
|
"eval_steps_per_second": 3.121, |
|
"step": 400 |
|
}, |
|
{ |
|
"acc": 0.91402645, |
|
"epoch": 0.5975654739948358, |
|
"grad_norm": 1.6283342820719429, |
|
"learning_rate": 1.4774494556765163e-06, |
|
"loss": 0.29935551, |
|
"memory(GiB)": 43.79, |
|
"step": 405, |
|
"train_speed(iter/s)": 0.169655 |
|
}, |
|
{ |
|
"acc": 0.91232147, |
|
"epoch": 0.6049428255256363, |
|
"grad_norm": 1.7979698219270268, |
|
"learning_rate": 1.4696734059097977e-06, |
|
"loss": 0.29618566, |
|
"memory(GiB)": 34.75, |
|
"step": 410, |
|
"train_speed(iter/s)": 0.169867 |
|
}, |
|
{ |
|
"acc": 0.91495514, |
|
"epoch": 0.6123201770564367, |
|
"grad_norm": 1.400313093548897, |
|
"learning_rate": 1.4618973561430792e-06, |
|
"loss": 0.30076814, |
|
"memory(GiB)": 33.36, |
|
"step": 415, |
|
"train_speed(iter/s)": 0.169686 |
|
}, |
|
{ |
|
"acc": 0.91793385, |
|
"epoch": 0.6196975285872371, |
|
"grad_norm": 1.5440217170439645, |
|
"learning_rate": 1.4541213063763606e-06, |
|
"loss": 0.27723732, |
|
"memory(GiB)": 32.03, |
|
"step": 420, |
|
"train_speed(iter/s)": 0.169706 |
|
}, |
|
{ |
|
"acc": 0.92025652, |
|
"epoch": 0.6270748801180376, |
|
"grad_norm": 1.7171089334482643, |
|
"learning_rate": 1.446345256609642e-06, |
|
"loss": 0.28218346, |
|
"memory(GiB)": 31.84, |
|
"step": 425, |
|
"train_speed(iter/s)": 0.169824 |
|
}, |
|
{ |
|
"acc": 0.91456184, |
|
"epoch": 0.634452231648838, |
|
"grad_norm": 1.7617810648771757, |
|
"learning_rate": 1.4385692068429238e-06, |
|
"loss": 0.30232787, |
|
"memory(GiB)": 33.01, |
|
"step": 430, |
|
"train_speed(iter/s)": 0.169549 |
|
}, |
|
{ |
|
"acc": 0.91554451, |
|
"epoch": 0.6418295831796386, |
|
"grad_norm": 2.1102714988825966, |
|
"learning_rate": 1.4307931570762052e-06, |
|
"loss": 0.29879627, |
|
"memory(GiB)": 33.18, |
|
"step": 435, |
|
"train_speed(iter/s)": 0.169677 |
|
}, |
|
{ |
|
"acc": 0.92126179, |
|
"epoch": 0.649206934710439, |
|
"grad_norm": 2.046949703950944, |
|
"learning_rate": 1.4230171073094869e-06, |
|
"loss": 0.27905126, |
|
"memory(GiB)": 35.07, |
|
"step": 440, |
|
"train_speed(iter/s)": 0.169605 |
|
}, |
|
{ |
|
"acc": 0.90152893, |
|
"epoch": 0.6565842862412394, |
|
"grad_norm": 2.001971595085909, |
|
"learning_rate": 1.4152410575427683e-06, |
|
"loss": 0.34060516, |
|
"memory(GiB)": 33.51, |
|
"step": 445, |
|
"train_speed(iter/s)": 0.169689 |
|
}, |
|
{ |
|
"acc": 0.91629639, |
|
"epoch": 0.6639616377720399, |
|
"grad_norm": 2.0397672790155528, |
|
"learning_rate": 1.4074650077760498e-06, |
|
"loss": 0.28595252, |
|
"memory(GiB)": 34.12, |
|
"step": 450, |
|
"train_speed(iter/s)": 0.170047 |
|
}, |
|
{ |
|
"epoch": 0.6639616377720399, |
|
"eval_acc": 0.9078082583226175, |
|
"eval_loss": 0.2715848386287689, |
|
"eval_runtime": 8.8964, |
|
"eval_samples_per_second": 24.504, |
|
"eval_steps_per_second": 3.147, |
|
"step": 450 |
|
}, |
|
{ |
|
"acc": 0.92627125, |
|
"epoch": 0.6713389893028403, |
|
"grad_norm": 1.6378143906534044, |
|
"learning_rate": 1.3996889580093312e-06, |
|
"loss": 0.25918436, |
|
"memory(GiB)": 43.88, |
|
"step": 455, |
|
"train_speed(iter/s)": 0.169369 |
|
}, |
|
{ |
|
"acc": 0.91979427, |
|
"epoch": 0.6787163408336407, |
|
"grad_norm": 1.7082862687854972, |
|
"learning_rate": 1.3919129082426127e-06, |
|
"loss": 0.27077117, |
|
"memory(GiB)": 32.33, |
|
"step": 460, |
|
"train_speed(iter/s)": 0.169438 |
|
}, |
|
{ |
|
"acc": 0.91361713, |
|
"epoch": 0.6860936923644412, |
|
"grad_norm": 2.293000555161464, |
|
"learning_rate": 1.3841368584758941e-06, |
|
"loss": 0.30449131, |
|
"memory(GiB)": 32.93, |
|
"step": 465, |
|
"train_speed(iter/s)": 0.169581 |
|
}, |
|
{ |
|
"acc": 0.91954422, |
|
"epoch": 0.6934710438952416, |
|
"grad_norm": 1.8478883729217541, |
|
"learning_rate": 1.3763608087091756e-06, |
|
"loss": 0.29147563, |
|
"memory(GiB)": 32.32, |
|
"step": 470, |
|
"train_speed(iter/s)": 0.169425 |
|
}, |
|
{ |
|
"acc": 0.91925821, |
|
"epoch": 0.700848395426042, |
|
"grad_norm": 2.1771276083255833, |
|
"learning_rate": 1.368584758942457e-06, |
|
"loss": 0.27578421, |
|
"memory(GiB)": 31.55, |
|
"step": 475, |
|
"train_speed(iter/s)": 0.169717 |
|
}, |
|
{ |
|
"acc": 0.91978226, |
|
"epoch": 0.7082257469568425, |
|
"grad_norm": 1.5525703471804124, |
|
"learning_rate": 1.3608087091757387e-06, |
|
"loss": 0.28457327, |
|
"memory(GiB)": 34.35, |
|
"step": 480, |
|
"train_speed(iter/s)": 0.169473 |
|
}, |
|
{ |
|
"acc": 0.91358566, |
|
"epoch": 0.7156030984876429, |
|
"grad_norm": 1.6094545899681876, |
|
"learning_rate": 1.3530326594090201e-06, |
|
"loss": 0.29641771, |
|
"memory(GiB)": 34.35, |
|
"step": 485, |
|
"train_speed(iter/s)": 0.169292 |
|
}, |
|
{ |
|
"acc": 0.9157114, |
|
"epoch": 0.7229804500184434, |
|
"grad_norm": 2.001462148706446, |
|
"learning_rate": 1.3452566096423018e-06, |
|
"loss": 0.30091541, |
|
"memory(GiB)": 33.0, |
|
"step": 490, |
|
"train_speed(iter/s)": 0.169539 |
|
}, |
|
{ |
|
"acc": 0.9181448, |
|
"epoch": 0.7303578015492438, |
|
"grad_norm": 1.933852376850104, |
|
"learning_rate": 1.3374805598755833e-06, |
|
"loss": 0.28622799, |
|
"memory(GiB)": 31.96, |
|
"step": 495, |
|
"train_speed(iter/s)": 0.169315 |
|
}, |
|
{ |
|
"acc": 0.91473122, |
|
"epoch": 0.7377351530800442, |
|
"grad_norm": 1.9036456322193762, |
|
"learning_rate": 1.3297045101088647e-06, |
|
"loss": 0.3094301, |
|
"memory(GiB)": 31.84, |
|
"step": 500, |
|
"train_speed(iter/s)": 0.169482 |
|
}, |
|
{ |
|
"epoch": 0.7377351530800442, |
|
"eval_acc": 0.9090048578368338, |
|
"eval_loss": 0.2688305675983429, |
|
"eval_runtime": 8.8274, |
|
"eval_samples_per_second": 24.696, |
|
"eval_steps_per_second": 3.172, |
|
"step": 500 |
|
}, |
|
{ |
|
"acc": 0.91458435, |
|
"epoch": 0.7451125046108447, |
|
"grad_norm": 1.9335752594206985, |
|
"learning_rate": 1.3219284603421462e-06, |
|
"loss": 0.29494238, |
|
"memory(GiB)": 43.4, |
|
"step": 505, |
|
"train_speed(iter/s)": 0.168821 |
|
}, |
|
{ |
|
"acc": 0.9221386, |
|
"epoch": 0.7524898561416451, |
|
"grad_norm": 1.8197097143608403, |
|
"learning_rate": 1.3141524105754276e-06, |
|
"loss": 0.2647439, |
|
"memory(GiB)": 33.36, |
|
"step": 510, |
|
"train_speed(iter/s)": 0.168682 |
|
}, |
|
{ |
|
"acc": 0.92193203, |
|
"epoch": 0.7598672076724456, |
|
"grad_norm": 1.901554742963865, |
|
"learning_rate": 1.306376360808709e-06, |
|
"loss": 0.27191839, |
|
"memory(GiB)": 30.47, |
|
"step": 515, |
|
"train_speed(iter/s)": 0.168924 |
|
}, |
|
{ |
|
"acc": 0.91413088, |
|
"epoch": 0.7672445592032461, |
|
"grad_norm": 2.0670792917636236, |
|
"learning_rate": 1.2986003110419905e-06, |
|
"loss": 0.296503, |
|
"memory(GiB)": 32.43, |
|
"step": 520, |
|
"train_speed(iter/s)": 0.168732 |
|
}, |
|
{ |
|
"acc": 0.92014456, |
|
"epoch": 0.7746219107340465, |
|
"grad_norm": 1.3940992355499904, |
|
"learning_rate": 1.290824261275272e-06, |
|
"loss": 0.27345006, |
|
"memory(GiB)": 31.88, |
|
"step": 525, |
|
"train_speed(iter/s)": 0.168564 |
|
}, |
|
{ |
|
"acc": 0.91787033, |
|
"epoch": 0.781999262264847, |
|
"grad_norm": 1.7528498159038246, |
|
"learning_rate": 1.2830482115085536e-06, |
|
"loss": 0.27718287, |
|
"memory(GiB)": 32.83, |
|
"step": 530, |
|
"train_speed(iter/s)": 0.168633 |
|
}, |
|
{ |
|
"acc": 0.91950254, |
|
"epoch": 0.7893766137956474, |
|
"grad_norm": 1.6045395248629215, |
|
"learning_rate": 1.275272161741835e-06, |
|
"loss": 0.27553134, |
|
"memory(GiB)": 30.99, |
|
"step": 535, |
|
"train_speed(iter/s)": 0.168504 |
|
}, |
|
{ |
|
"acc": 0.91442375, |
|
"epoch": 0.7967539653264478, |
|
"grad_norm": 2.0480557410695686, |
|
"learning_rate": 1.2674961119751167e-06, |
|
"loss": 0.29672928, |
|
"memory(GiB)": 32.9, |
|
"step": 540, |
|
"train_speed(iter/s)": 0.168746 |
|
}, |
|
{ |
|
"acc": 0.91783228, |
|
"epoch": 0.8041313168572483, |
|
"grad_norm": 1.7063380836356228, |
|
"learning_rate": 1.2597200622083982e-06, |
|
"loss": 0.28551073, |
|
"memory(GiB)": 32.64, |
|
"step": 545, |
|
"train_speed(iter/s)": 0.168632 |
|
}, |
|
{ |
|
"acc": 0.91965294, |
|
"epoch": 0.8115086683880487, |
|
"grad_norm": 1.8091430299196016, |
|
"learning_rate": 1.2519440124416796e-06, |
|
"loss": 0.28367462, |
|
"memory(GiB)": 33.12, |
|
"step": 550, |
|
"train_speed(iter/s)": 0.168537 |
|
}, |
|
{ |
|
"epoch": 0.8115086683880487, |
|
"eval_acc": 0.9094959994284898, |
|
"eval_loss": 0.265609472990036, |
|
"eval_runtime": 8.9354, |
|
"eval_samples_per_second": 24.397, |
|
"eval_steps_per_second": 3.134, |
|
"step": 550 |
|
}, |
|
{ |
|
"acc": 0.91708422, |
|
"epoch": 0.8188860199188491, |
|
"grad_norm": 1.9338041082162762, |
|
"learning_rate": 1.244167962674961e-06, |
|
"loss": 0.30288501, |
|
"memory(GiB)": 44.46, |
|
"step": 555, |
|
"train_speed(iter/s)": 0.168246 |
|
}, |
|
{ |
|
"acc": 0.91793032, |
|
"epoch": 0.8262633714496496, |
|
"grad_norm": 1.960186880981984, |
|
"learning_rate": 1.2363919129082425e-06, |
|
"loss": 0.29391913, |
|
"memory(GiB)": 33.02, |
|
"step": 560, |
|
"train_speed(iter/s)": 0.168119 |
|
}, |
|
{ |
|
"acc": 0.92976294, |
|
"epoch": 0.83364072298045, |
|
"grad_norm": 1.7220525036525174, |
|
"learning_rate": 1.228615863141524e-06, |
|
"loss": 0.24753182, |
|
"memory(GiB)": 32.77, |
|
"step": 565, |
|
"train_speed(iter/s)": 0.16819 |
|
}, |
|
{ |
|
"acc": 0.9202878, |
|
"epoch": 0.8410180745112504, |
|
"grad_norm": 1.9681280144249207, |
|
"learning_rate": 1.2208398133748054e-06, |
|
"loss": 0.27648234, |
|
"memory(GiB)": 32.36, |
|
"step": 570, |
|
"train_speed(iter/s)": 0.168331 |
|
}, |
|
{ |
|
"acc": 0.91870079, |
|
"epoch": 0.8483954260420509, |
|
"grad_norm": 1.6402903494642216, |
|
"learning_rate": 1.2130637636080869e-06, |
|
"loss": 0.29140263, |
|
"memory(GiB)": 35.18, |
|
"step": 575, |
|
"train_speed(iter/s)": 0.168255 |
|
}, |
|
{ |
|
"acc": 0.91364193, |
|
"epoch": 0.8557727775728513, |
|
"grad_norm": 2.146651599757078, |
|
"learning_rate": 1.2052877138413686e-06, |
|
"loss": 0.31224487, |
|
"memory(GiB)": 37.43, |
|
"step": 580, |
|
"train_speed(iter/s)": 0.168463 |
|
}, |
|
{ |
|
"acc": 0.92091951, |
|
"epoch": 0.8631501291036517, |
|
"grad_norm": 2.110687395796676, |
|
"learning_rate": 1.19751166407465e-06, |
|
"loss": 0.27074888, |
|
"memory(GiB)": 30.34, |
|
"step": 585, |
|
"train_speed(iter/s)": 0.16837 |
|
}, |
|
{ |
|
"acc": 0.92361298, |
|
"epoch": 0.8705274806344522, |
|
"grad_norm": 1.341809177582426, |
|
"learning_rate": 1.1897356143079317e-06, |
|
"loss": 0.26371779, |
|
"memory(GiB)": 32.35, |
|
"step": 590, |
|
"train_speed(iter/s)": 0.168375 |
|
}, |
|
{ |
|
"acc": 0.92123985, |
|
"epoch": 0.8779048321652527, |
|
"grad_norm": 1.8270563745834436, |
|
"learning_rate": 1.1819595645412131e-06, |
|
"loss": 0.26702247, |
|
"memory(GiB)": 34.77, |
|
"step": 595, |
|
"train_speed(iter/s)": 0.168532 |
|
}, |
|
{ |
|
"acc": 0.91653709, |
|
"epoch": 0.8852821836960532, |
|
"grad_norm": 1.6527432011832037, |
|
"learning_rate": 1.1741835147744946e-06, |
|
"loss": 0.29842911, |
|
"memory(GiB)": 33.87, |
|
"step": 600, |
|
"train_speed(iter/s)": 0.168424 |
|
}, |
|
{ |
|
"epoch": 0.8852821836960532, |
|
"eval_acc": 0.9105765109301329, |
|
"eval_loss": 0.2623133361339569, |
|
"eval_runtime": 8.7796, |
|
"eval_samples_per_second": 24.83, |
|
"eval_steps_per_second": 3.189, |
|
"step": 600 |
|
}, |
|
{ |
|
"acc": 0.91810665, |
|
"epoch": 0.8926595352268536, |
|
"grad_norm": 1.3239706750197222, |
|
"learning_rate": 1.166407465007776e-06, |
|
"loss": 0.29543982, |
|
"memory(GiB)": 43.63, |
|
"step": 605, |
|
"train_speed(iter/s)": 0.16811 |
|
}, |
|
{ |
|
"acc": 0.92373562, |
|
"epoch": 0.900036886757654, |
|
"grad_norm": 1.589090709862595, |
|
"learning_rate": 1.1586314152410575e-06, |
|
"loss": 0.27000737, |
|
"memory(GiB)": 32.08, |
|
"step": 610, |
|
"train_speed(iter/s)": 0.168111 |
|
}, |
|
{ |
|
"acc": 0.92571859, |
|
"epoch": 0.9074142382884545, |
|
"grad_norm": 1.786690071917202, |
|
"learning_rate": 1.150855365474339e-06, |
|
"loss": 0.26558821, |
|
"memory(GiB)": 34.26, |
|
"step": 615, |
|
"train_speed(iter/s)": 0.167944 |
|
}, |
|
{ |
|
"acc": 0.92350941, |
|
"epoch": 0.9147915898192549, |
|
"grad_norm": 1.4482760998007842, |
|
"learning_rate": 1.1430793157076204e-06, |
|
"loss": 0.27038224, |
|
"memory(GiB)": 32.87, |
|
"step": 620, |
|
"train_speed(iter/s)": 0.168075 |
|
}, |
|
{ |
|
"acc": 0.92567997, |
|
"epoch": 0.9221689413500553, |
|
"grad_norm": 1.5651995631831526, |
|
"learning_rate": 1.1353032659409018e-06, |
|
"loss": 0.25891747, |
|
"memory(GiB)": 32.63, |
|
"step": 625, |
|
"train_speed(iter/s)": 0.168015 |
|
}, |
|
{ |
|
"acc": 0.91823616, |
|
"epoch": 0.9295462928808558, |
|
"grad_norm": 1.4462434724962336, |
|
"learning_rate": 1.1275272161741835e-06, |
|
"loss": 0.2788033, |
|
"memory(GiB)": 38.22, |
|
"step": 630, |
|
"train_speed(iter/s)": 0.167998 |
|
}, |
|
{ |
|
"acc": 0.92322083, |
|
"epoch": 0.9369236444116562, |
|
"grad_norm": 1.4194043988299254, |
|
"learning_rate": 1.119751166407465e-06, |
|
"loss": 0.26030297, |
|
"memory(GiB)": 32.29, |
|
"step": 635, |
|
"train_speed(iter/s)": 0.168162 |
|
}, |
|
{ |
|
"acc": 0.92457771, |
|
"epoch": 0.9443009959424566, |
|
"grad_norm": 1.8304569462755849, |
|
"learning_rate": 1.1119751166407466e-06, |
|
"loss": 0.27183619, |
|
"memory(GiB)": 35.33, |
|
"step": 640, |
|
"train_speed(iter/s)": 0.168086 |
|
}, |
|
{ |
|
"acc": 0.9201807, |
|
"epoch": 0.9516783474732571, |
|
"grad_norm": 1.6355541683467607, |
|
"learning_rate": 1.104199066874028e-06, |
|
"loss": 0.27730408, |
|
"memory(GiB)": 31.4, |
|
"step": 645, |
|
"train_speed(iter/s)": 0.168284 |
|
}, |
|
{ |
|
"acc": 0.92337418, |
|
"epoch": 0.9590556990040575, |
|
"grad_norm": 1.6309155055635356, |
|
"learning_rate": 1.0964230171073095e-06, |
|
"loss": 0.25860276, |
|
"memory(GiB)": 32.67, |
|
"step": 650, |
|
"train_speed(iter/s)": 0.168267 |
|
}, |
|
{ |
|
"epoch": 0.9590556990040575, |
|
"eval_acc": 0.9113176882411773, |
|
"eval_loss": 0.2569684386253357, |
|
"eval_runtime": 8.8598, |
|
"eval_samples_per_second": 24.605, |
|
"eval_steps_per_second": 3.16, |
|
"step": 650 |
|
}, |
|
{ |
|
"acc": 0.91919975, |
|
"epoch": 0.966433050534858, |
|
"grad_norm": 1.482378816274918, |
|
"learning_rate": 1.088646967340591e-06, |
|
"loss": 0.28527048, |
|
"memory(GiB)": 45.59, |
|
"step": 655, |
|
"train_speed(iter/s)": 0.167772 |
|
}, |
|
{ |
|
"acc": 0.92037735, |
|
"epoch": 0.9738104020656584, |
|
"grad_norm": 2.2165369625767712, |
|
"learning_rate": 1.0808709175738724e-06, |
|
"loss": 0.28198528, |
|
"memory(GiB)": 32.93, |
|
"step": 660, |
|
"train_speed(iter/s)": 0.16789 |
|
}, |
|
{ |
|
"acc": 0.92200727, |
|
"epoch": 0.9811877535964588, |
|
"grad_norm": 1.7151646172394919, |
|
"learning_rate": 1.0730948678071539e-06, |
|
"loss": 0.27098572, |
|
"memory(GiB)": 33.1, |
|
"step": 665, |
|
"train_speed(iter/s)": 0.167862 |
|
}, |
|
{ |
|
"acc": 0.92197828, |
|
"epoch": 0.9885651051272594, |
|
"grad_norm": 2.076606131505725, |
|
"learning_rate": 1.0653188180404353e-06, |
|
"loss": 0.26747627, |
|
"memory(GiB)": 34.45, |
|
"step": 670, |
|
"train_speed(iter/s)": 0.167945 |
|
}, |
|
{ |
|
"acc": 0.92063084, |
|
"epoch": 0.9959424566580598, |
|
"grad_norm": 1.7465662806523121, |
|
"learning_rate": 1.0575427682737168e-06, |
|
"loss": 0.27087922, |
|
"memory(GiB)": 39.51, |
|
"step": 675, |
|
"train_speed(iter/s)": 0.167951 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1354, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 66000591650816.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|