| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 660, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.796875, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 0.1258, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.69140625, |
| "learning_rate": 8.181818181818181e-06, |
| "loss": 0.1212, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.421875, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 0.1055, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 1.7272727272727274e-05, |
| "loss": 0.0963, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.2392578125, |
| "learning_rate": 2.1818181818181818e-05, |
| "loss": 0.0931, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.1796875, |
| "learning_rate": 2.6363636363636365e-05, |
| "loss": 0.0903, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 2.9999830539872836e-05, |
| "loss": 0.0843, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.150390625, |
| "learning_rate": 2.9993899882114902e-05, |
| "loss": 0.0853, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.138671875, |
| "learning_rate": 2.997950047184977e-05, |
| "loss": 0.0804, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 0.146484375, |
| "learning_rate": 2.9956641346126986e-05, |
| "loss": 0.0809, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 2.9925336851301575e-05, |
| "loss": 0.0795, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.9885606634030267e-05, |
| "loss": 0.0789, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 0.134765625, |
| "learning_rate": 2.98374756289413e-05, |
| "loss": 0.0778, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 2.9780974042985506e-05, |
| "loss": 0.0761, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.2177734375, |
| "learning_rate": 2.971613733647841e-05, |
| "loss": 0.0751, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 2.9643006200845458e-05, |
| "loss": 0.0756, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.5757575757575757, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.9561626533084068e-05, |
| "loss": 0.0765, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 2.9472049406958788e-05, |
| "loss": 0.0746, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.878787878787879, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 2.937433104094746e-05, |
| "loss": 0.0757, |
| "step": 95 |
| }, |
| { |
| "epoch": 3.0303030303030303, |
| "grad_norm": 0.1328125, |
| "learning_rate": 2.9268532762958568e-05, |
| "loss": 0.0725, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.915472097184196e-05, |
| "loss": 0.0742, |
| "step": 105 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 2.903296709571698e-05, |
| "loss": 0.0707, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.484848484848485, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 2.8903347547144327e-05, |
| "loss": 0.0734, |
| "step": 115 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.876594367516961e-05, |
| "loss": 0.0724, |
| "step": 120 |
| }, |
| { |
| "epoch": 3.787878787878788, |
| "grad_norm": 0.1318359375, |
| "learning_rate": 2.8620841714268804e-05, |
| "loss": 0.0725, |
| "step": 125 |
| }, |
| { |
| "epoch": 3.9393939393939394, |
| "grad_norm": 0.1484375, |
| "learning_rate": 2.846813273022764e-05, |
| "loss": 0.0714, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.090909090909091, |
| "grad_norm": 0.134765625, |
| "learning_rate": 2.83079125629888e-05, |
| "loss": 0.0727, |
| "step": 135 |
| }, |
| { |
| "epoch": 4.242424242424242, |
| "grad_norm": 0.146484375, |
| "learning_rate": 2.8140281766502957e-05, |
| "loss": 0.0716, |
| "step": 140 |
| }, |
| { |
| "epoch": 4.393939393939394, |
| "grad_norm": 0.140625, |
| "learning_rate": 2.7965345545621217e-05, |
| "loss": 0.072, |
| "step": 145 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 2.7783213690068737e-05, |
| "loss": 0.0701, |
| "step": 150 |
| }, |
| { |
| "epoch": 4.696969696969697, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 2.7594000505540807e-05, |
| "loss": 0.0741, |
| "step": 155 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.7397824741964805e-05, |
| "loss": 0.0665, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.189453125, |
| "learning_rate": 2.7194809518972856e-05, |
| "loss": 0.0705, |
| "step": 165 |
| }, |
| { |
| "epoch": 5.151515151515151, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 2.6985082248632174e-05, |
| "loss": 0.0679, |
| "step": 170 |
| }, |
| { |
| "epoch": 5.303030303030303, |
| "grad_norm": 0.14453125, |
| "learning_rate": 2.676877455548141e-05, |
| "loss": 0.0693, |
| "step": 175 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.6546022193923274e-05, |
| "loss": 0.0696, |
| "step": 180 |
| }, |
| { |
| "epoch": 5.606060606060606, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 2.631696496302526e-05, |
| "loss": 0.0709, |
| "step": 185 |
| }, |
| { |
| "epoch": 5.757575757575758, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.6081746618781953e-05, |
| "loss": 0.0694, |
| "step": 190 |
| }, |
| { |
| "epoch": 5.909090909090909, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 2.584051478389399e-05, |
| "loss": 0.0682, |
| "step": 195 |
| }, |
| { |
| "epoch": 6.0606060606060606, |
| "grad_norm": 0.154296875, |
| "learning_rate": 2.559342085512022e-05, |
| "loss": 0.0686, |
| "step": 200 |
| }, |
| { |
| "epoch": 6.212121212121212, |
| "grad_norm": 0.154296875, |
| "learning_rate": 2.5340619908261352e-05, |
| "loss": 0.0703, |
| "step": 205 |
| }, |
| { |
| "epoch": 6.363636363636363, |
| "grad_norm": 0.140625, |
| "learning_rate": 2.508227060083457e-05, |
| "loss": 0.0647, |
| "step": 210 |
| }, |
| { |
| "epoch": 6.515151515151516, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.4818535072500327e-05, |
| "loss": 0.064, |
| "step": 215 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.142578125, |
| "learning_rate": 2.4549578843303708e-05, |
| "loss": 0.0676, |
| "step": 220 |
| }, |
| { |
| "epoch": 6.818181818181818, |
| "grad_norm": 0.14453125, |
| "learning_rate": 2.427557070979427e-05, |
| "loss": 0.0669, |
| "step": 225 |
| }, |
| { |
| "epoch": 6.96969696969697, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 2.399668263908961e-05, |
| "loss": 0.0679, |
| "step": 230 |
| }, |
| { |
| "epoch": 7.121212121212121, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 2.3713089660948985e-05, |
| "loss": 0.0666, |
| "step": 235 |
| }, |
| { |
| "epoch": 7.2727272727272725, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 2.342496975792494e-05, |
| "loss": 0.066, |
| "step": 240 |
| }, |
| { |
| "epoch": 7.424242424242424, |
| "grad_norm": 0.123046875, |
| "learning_rate": 2.313250375366167e-05, |
| "loss": 0.0637, |
| "step": 245 |
| }, |
| { |
| "epoch": 7.575757575757576, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 2.283587519941036e-05, |
| "loss": 0.0683, |
| "step": 250 |
| }, |
| { |
| "epoch": 7.7272727272727275, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 2.253527025883271e-05, |
| "loss": 0.0642, |
| "step": 255 |
| }, |
| { |
| "epoch": 7.878787878787879, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 2.2230877591164858e-05, |
| "loss": 0.0682, |
| "step": 260 |
| }, |
| { |
| "epoch": 8.030303030303031, |
| "grad_norm": 0.1328125, |
| "learning_rate": 2.192288823281509e-05, |
| "loss": 0.0628, |
| "step": 265 |
| }, |
| { |
| "epoch": 8.181818181818182, |
| "grad_norm": 0.158203125, |
| "learning_rate": 2.1611495477469712e-05, |
| "loss": 0.0635, |
| "step": 270 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.15234375, |
| "learning_rate": 2.1296894754782155e-05, |
| "loss": 0.0679, |
| "step": 275 |
| }, |
| { |
| "epoch": 8.484848484848484, |
| "grad_norm": 0.140625, |
| "learning_rate": 2.0979283507721653e-05, |
| "loss": 0.0631, |
| "step": 280 |
| }, |
| { |
| "epoch": 8.636363636363637, |
| "grad_norm": 0.12890625, |
| "learning_rate": 2.0658861068658254e-05, |
| "loss": 0.0634, |
| "step": 285 |
| }, |
| { |
| "epoch": 8.787878787878787, |
| "grad_norm": 0.14453125, |
| "learning_rate": 2.0335828534262148e-05, |
| "loss": 0.0652, |
| "step": 290 |
| }, |
| { |
| "epoch": 8.93939393939394, |
| "grad_norm": 0.1484375, |
| "learning_rate": 2.001038863929568e-05, |
| "loss": 0.067, |
| "step": 295 |
| }, |
| { |
| "epoch": 9.090909090909092, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 1.9682745629377267e-05, |
| "loss": 0.0647, |
| "step": 300 |
| }, |
| { |
| "epoch": 9.242424242424242, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 1.9353105132797175e-05, |
| "loss": 0.0628, |
| "step": 305 |
| }, |
| { |
| "epoch": 9.393939393939394, |
| "grad_norm": 0.140625, |
| "learning_rate": 1.902167403146548e-05, |
| "loss": 0.0625, |
| "step": 310 |
| }, |
| { |
| "epoch": 9.545454545454545, |
| "grad_norm": 0.1728515625, |
| "learning_rate": 1.8688660331073253e-05, |
| "loss": 0.0634, |
| "step": 315 |
| }, |
| { |
| "epoch": 9.696969696969697, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 1.8354273030548512e-05, |
| "loss": 0.0618, |
| "step": 320 |
| }, |
| { |
| "epoch": 9.848484848484848, |
| "grad_norm": 0.14453125, |
| "learning_rate": 1.801872199088878e-05, |
| "loss": 0.0618, |
| "step": 325 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.1630859375, |
| "learning_rate": 1.7682217803452616e-05, |
| "loss": 0.0633, |
| "step": 330 |
| }, |
| { |
| "epoch": 10.151515151515152, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.7344971657792768e-05, |
| "loss": 0.0651, |
| "step": 335 |
| }, |
| { |
| "epoch": 10.303030303030303, |
| "grad_norm": 0.15625, |
| "learning_rate": 1.7007195209113934e-05, |
| "loss": 0.0623, |
| "step": 340 |
| }, |
| { |
| "epoch": 10.454545454545455, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 1.666910044543822e-05, |
| "loss": 0.0647, |
| "step": 345 |
| }, |
| { |
| "epoch": 10.606060606060606, |
| "grad_norm": 0.12890625, |
| "learning_rate": 1.6330899554561785e-05, |
| "loss": 0.0635, |
| "step": 350 |
| }, |
| { |
| "epoch": 10.757575757575758, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 1.5992804790886075e-05, |
| "loss": 0.0622, |
| "step": 355 |
| }, |
| { |
| "epoch": 10.909090909090908, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 1.5655028342207235e-05, |
| "loss": 0.0646, |
| "step": 360 |
| }, |
| { |
| "epoch": 11.06060606060606, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 1.5317782196547387e-05, |
| "loss": 0.0638, |
| "step": 365 |
| }, |
| { |
| "epoch": 11.212121212121213, |
| "grad_norm": 0.146484375, |
| "learning_rate": 1.4981278009111222e-05, |
| "loss": 0.0633, |
| "step": 370 |
| }, |
| { |
| "epoch": 11.363636363636363, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.4645726969451489e-05, |
| "loss": 0.0602, |
| "step": 375 |
| }, |
| { |
| "epoch": 11.515151515151516, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 1.4311339668926748e-05, |
| "loss": 0.061, |
| "step": 380 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 1.397832596853452e-05, |
| "loss": 0.0636, |
| "step": 385 |
| }, |
| { |
| "epoch": 11.818181818181818, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 1.3646894867202821e-05, |
| "loss": 0.0605, |
| "step": 390 |
| }, |
| { |
| "epoch": 11.969696969696969, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 1.3317254370622732e-05, |
| "loss": 0.0642, |
| "step": 395 |
| }, |
| { |
| "epoch": 12.121212121212121, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 1.298961136070432e-05, |
| "loss": 0.0633, |
| "step": 400 |
| }, |
| { |
| "epoch": 12.272727272727273, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 1.266417146573785e-05, |
| "loss": 0.0605, |
| "step": 405 |
| }, |
| { |
| "epoch": 12.424242424242424, |
| "grad_norm": 0.146484375, |
| "learning_rate": 1.2341138931341752e-05, |
| "loss": 0.0627, |
| "step": 410 |
| }, |
| { |
| "epoch": 12.575757575757576, |
| "grad_norm": 0.16015625, |
| "learning_rate": 1.2020716492278353e-05, |
| "loss": 0.0628, |
| "step": 415 |
| }, |
| { |
| "epoch": 12.727272727272727, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 1.1703105245217848e-05, |
| "loss": 0.0598, |
| "step": 420 |
| }, |
| { |
| "epoch": 12.878787878787879, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 1.1388504522530296e-05, |
| "loss": 0.0611, |
| "step": 425 |
| }, |
| { |
| "epoch": 13.030303030303031, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 1.1077111767184916e-05, |
| "loss": 0.0638, |
| "step": 430 |
| }, |
| { |
| "epoch": 13.181818181818182, |
| "grad_norm": 0.1376953125, |
| "learning_rate": 1.0769122408835148e-05, |
| "loss": 0.0585, |
| "step": 435 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 1.0464729741167291e-05, |
| "loss": 0.0635, |
| "step": 440 |
| }, |
| { |
| "epoch": 13.484848484848484, |
| "grad_norm": 0.14453125, |
| "learning_rate": 1.016412480058964e-05, |
| "loss": 0.0621, |
| "step": 445 |
| }, |
| { |
| "epoch": 13.636363636363637, |
| "grad_norm": 0.1552734375, |
| "learning_rate": 9.86749624633833e-06, |
| "loss": 0.0635, |
| "step": 450 |
| }, |
| { |
| "epoch": 13.787878787878787, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 9.575030242075062e-06, |
| "loss": 0.0597, |
| "step": 455 |
| }, |
| { |
| "epoch": 13.93939393939394, |
| "grad_norm": 0.1484375, |
| "learning_rate": 9.286910339051015e-06, |
| "loss": 0.0659, |
| "step": 460 |
| }, |
| { |
| "epoch": 14.090909090909092, |
| "grad_norm": 0.140625, |
| "learning_rate": 9.003317360910392e-06, |
| "loss": 0.0618, |
| "step": 465 |
| }, |
| { |
| "epoch": 14.242424242424242, |
| "grad_norm": 0.14453125, |
| "learning_rate": 8.724429290205732e-06, |
| "loss": 0.0612, |
| "step": 470 |
| }, |
| { |
| "epoch": 14.393939393939394, |
| "grad_norm": 0.142578125, |
| "learning_rate": 8.450421156696298e-06, |
| "loss": 0.0615, |
| "step": 475 |
| }, |
| { |
| "epoch": 14.545454545454545, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 8.181464927499674e-06, |
| "loss": 0.0591, |
| "step": 480 |
| }, |
| { |
| "epoch": 14.696969696969697, |
| "grad_norm": 0.15234375, |
| "learning_rate": 7.917729399165435e-06, |
| "loss": 0.0606, |
| "step": 485 |
| }, |
| { |
| "epoch": 14.848484848484848, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 7.659380091738652e-06, |
| "loss": 0.0592, |
| "step": 490 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 7.406579144879779e-06, |
| "loss": 0.0601, |
| "step": 495 |
| }, |
| { |
| "epoch": 15.151515151515152, |
| "grad_norm": 0.1455078125, |
| "learning_rate": 7.159485216106013e-06, |
| "loss": 0.0616, |
| "step": 500 |
| }, |
| { |
| "epoch": 15.303030303030303, |
| "grad_norm": 0.146484375, |
| "learning_rate": 6.918253381218046e-06, |
| "loss": 0.0583, |
| "step": 505 |
| }, |
| { |
| "epoch": 15.454545454545455, |
| "grad_norm": 0.15234375, |
| "learning_rate": 6.683035036974742e-06, |
| "loss": 0.0613, |
| "step": 510 |
| }, |
| { |
| "epoch": 15.606060606060606, |
| "grad_norm": 0.1484375, |
| "learning_rate": 6.45397780607673e-06, |
| "loss": 0.0572, |
| "step": 515 |
| }, |
| { |
| "epoch": 15.757575757575758, |
| "grad_norm": 0.1884765625, |
| "learning_rate": 6.23122544451859e-06, |
| "loss": 0.0616, |
| "step": 520 |
| }, |
| { |
| "epoch": 15.909090909090908, |
| "grad_norm": 0.15234375, |
| "learning_rate": 6.014917751367825e-06, |
| "loss": 0.0601, |
| "step": 525 |
| }, |
| { |
| "epoch": 16.060606060606062, |
| "grad_norm": 0.154296875, |
| "learning_rate": 5.80519048102715e-06, |
| "loss": 0.0597, |
| "step": 530 |
| }, |
| { |
| "epoch": 16.21212121212121, |
| "grad_norm": 0.140625, |
| "learning_rate": 5.602175258035204e-06, |
| "loss": 0.0581, |
| "step": 535 |
| }, |
| { |
| "epoch": 16.363636363636363, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 5.4059994944591914e-06, |
| "loss": 0.0617, |
| "step": 540 |
| }, |
| { |
| "epoch": 16.515151515151516, |
| "grad_norm": 0.1416015625, |
| "learning_rate": 5.2167863099312636e-06, |
| "loss": 0.0587, |
| "step": 545 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.14453125, |
| "learning_rate": 5.034654454378783e-06, |
| "loss": 0.0599, |
| "step": 550 |
| }, |
| { |
| "epoch": 16.818181818181817, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.859718233497048e-06, |
| "loss": 0.0624, |
| "step": 555 |
| }, |
| { |
| "epoch": 16.96969696969697, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.692087437011203e-06, |
| "loss": 0.0589, |
| "step": 560 |
| }, |
| { |
| "epoch": 17.12121212121212, |
| "grad_norm": 0.1533203125, |
| "learning_rate": 4.5318672697723665e-06, |
| "loss": 0.0624, |
| "step": 565 |
| }, |
| { |
| "epoch": 17.272727272727273, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 4.3791582857311975e-06, |
| "loss": 0.0603, |
| "step": 570 |
| }, |
| { |
| "epoch": 17.424242424242426, |
| "grad_norm": 0.142578125, |
| "learning_rate": 4.2340563248303915e-06, |
| "loss": 0.0621, |
| "step": 575 |
| }, |
| { |
| "epoch": 17.575757575757574, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 4.096652452855675e-06, |
| "loss": 0.0608, |
| "step": 580 |
| }, |
| { |
| "epoch": 17.727272727272727, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 3.967032904283021e-06, |
| "loss": 0.06, |
| "step": 585 |
| }, |
| { |
| "epoch": 17.87878787878788, |
| "grad_norm": 0.1484375, |
| "learning_rate": 3.8452790281580445e-06, |
| "loss": 0.0605, |
| "step": 590 |
| }, |
| { |
| "epoch": 18.03030303030303, |
| "grad_norm": 0.154296875, |
| "learning_rate": 3.731467237041433e-06, |
| "loss": 0.0601, |
| "step": 595 |
| }, |
| { |
| "epoch": 18.181818181818183, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 3.6256689590525444e-06, |
| "loss": 0.0628, |
| "step": 600 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 3.5279505930412164e-06, |
| "loss": 0.062, |
| "step": 605 |
| }, |
| { |
| "epoch": 18.484848484848484, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.4383734669159366e-06, |
| "loss": 0.0618, |
| "step": 610 |
| }, |
| { |
| "epoch": 18.636363636363637, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 3.356993799154545e-06, |
| "loss": 0.059, |
| "step": 615 |
| }, |
| { |
| "epoch": 18.78787878787879, |
| "grad_norm": 0.146484375, |
| "learning_rate": 3.2838626635215874e-06, |
| "loss": 0.0595, |
| "step": 620 |
| }, |
| { |
| "epoch": 18.939393939393938, |
| "grad_norm": 0.1611328125, |
| "learning_rate": 3.2190259570144957e-06, |
| "loss": 0.0629, |
| "step": 625 |
| }, |
| { |
| "epoch": 19.09090909090909, |
| "grad_norm": 0.150390625, |
| "learning_rate": 3.162524371058697e-06, |
| "loss": 0.0612, |
| "step": 630 |
| }, |
| { |
| "epoch": 19.242424242424242, |
| "grad_norm": 0.1337890625, |
| "learning_rate": 3.1143933659697377e-06, |
| "loss": 0.0583, |
| "step": 635 |
| }, |
| { |
| "epoch": 19.393939393939394, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 3.0746631486984266e-06, |
| "loss": 0.0626, |
| "step": 640 |
| }, |
| { |
| "epoch": 19.545454545454547, |
| "grad_norm": 0.15234375, |
| "learning_rate": 3.043358653873013e-06, |
| "loss": 0.0589, |
| "step": 645 |
| }, |
| { |
| "epoch": 19.696969696969695, |
| "grad_norm": 0.1572265625, |
| "learning_rate": 3.020499528150232e-06, |
| "loss": 0.0586, |
| "step": 650 |
| }, |
| { |
| "epoch": 19.848484848484848, |
| "grad_norm": 0.15625, |
| "learning_rate": 3.006100117885101e-06, |
| "loss": 0.0591, |
| "step": 655 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 3.000169460127164e-06, |
| "loss": 0.0613, |
| "step": 660 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 660, |
| "total_flos": 3.880913653947433e+18, |
| "train_loss": 0.06725140679063218, |
| "train_runtime": 3002.848, |
| "train_samples_per_second": 27.794, |
| "train_steps_per_second": 0.22 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 660, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.880913653947433e+18, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|